// Copyright (C) 1987-1995 by Symantec
// Copyright (C) 2000-2009 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
/*
 * This source file is made available for personal use
 * only. The license is in /dmd/src/dmd/backendlicense.txt
 * For any other uses, please contact Digital Mars.
 */

#if !SPP

#include	<stdio.h>
#include	<string.h>
#include	<time.h>
#include	<math.h>
#include	"cc.h"
#include	"el.h"
#include	"oper.h"
#include	"code.h"
#include	"global.h"

static char __file__[] = __FILE__;	/* for tassert.h		*/
#include	"tassert.h"

// Constants that the 8087 supports directly
// BUG: rewrite for 80 bit long doubles
#define PI		3.14159265358979323846
#define LOG2		0.30102999566398119521
#define LN2		0.6931471805599453094172321
#define LOG2T		3.32192809488736234787
#define LOG2E		1.4426950408889634074	/* 1/LN2		*/

#define FWAIT	0x9B		/* FWAIT opcode				*/

/* Mark variable referenced by e as not a register candidate		*/
#define notreg(e)	((e)->EV.sp.Vsym->Sflags &= ~GTregcand)

/* Generate the appropriate ESC instruction	*/
#define ESC(MF,b)	(0xD8 + ((MF) << 1) + (b))
enum MF
{	// Values for MF
	MFfloat		= 0,
	MFlong		= 1,
	MFdouble	= 2,
	MFword		= 3
};

NDP _8087elems[8];		// 8087 stack
NDP ndp_zero;

int stackused = 0;		/* number of items on the 8087 stack	*/

/*********************************
 */

struct Dconst
{
    int round;
    symbol *roundto0;
    symbol *roundtonearest;
};

static Dconst oldd;

#define NDPP	0	// print out debugging info

code *loadComplex(elem *e);
code *opmod_complex87(elem *e,regm_t *pretregs);
code *opass_complex87(elem *e,regm_t *pretregs);
STATIC code * genf2(code *c,unsigned op,unsigned rm);

#define CW_roundto0		0xFBF
#define CW_roundtonearest	0x3BF

STATIC code *genrnd(code *c, short cw);

/**********************************
 * When we need to temporarilly save 8087 registers, we record information
 * about the save into an array of NDP structs:
 */

NDP *NDP::save = NULL;
int NDP::savemax = 0;		/* # of entries in NDP::save[]		*/
int NDP::savetop = 0;		/* # of entries used in NDP::save[]	*/

#ifdef DEBUG
#define NDPSAVEINC 2		/* flush reallocation bugs		*/
#else
#define NDPSAVEINC 8		/* allocation chunk sizes		*/
#endif

/****************************************
 * Store/load to ndp save location i
 */

code *ndp_fstp(code *c, int i, tym_t ty)
{
    switch (tybasic(ty))
    {
	case TYfloat:
	case TYifloat:
	case TYcfloat:
	    c = genc1(c,0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP]
	    break;

	case TYdouble:
	case TYdouble_alias:
	case TYidouble:
	case TYcdouble:
	    c = genc1(c,0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP]
	    break;

	case TYldouble:
	case TYildouble:
	case TYcldouble:
	    c = genc1(c,0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP]
	    break;

	default:
	    assert(0);
    }
    return c;
}

code *ndp_fld(code *c, int i, tym_t ty)
{
    switch (tybasic(ty))
    {
	case TYfloat:
	case TYifloat:
	case TYcfloat:
	    c = genc1(c,0xD9,modregrm(2,0,BPRM),FLndp,i);
	    break;

	case TYdouble:
	case TYdouble_alias:
	case TYidouble:
	case TYcdouble:
	    c = genc1(c,0xDD,modregrm(2,0,BPRM),FLndp,i);
	    break;

	case TYldouble:
	case TYildouble:
	case TYcldouble:
	    c = genc1(c,0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP]
	    break;

	default:
	    assert(0);
    }
    return c;
}

/**************************
 * Return index of empty slot in NDP::save[].
 */

STATIC int getemptyslot()
{	int i;

	for (i = 0; i < NDP::savemax; i++)
		if (NDP::save[i].e == NULL)
			goto L1;
	/* Out of room, reallocate NDP::save[]	*/
	NDP::save = (NDP *)mem_realloc(NDP::save,
		(NDP::savemax + NDPSAVEINC) * sizeof(*NDP::save));
	/* clear out new portion of NDP::save[]	*/
	memset(NDP::save + NDP::savemax,0,NDPSAVEINC * sizeof(*NDP::save));
	i = NDP::savemax;
	NDP::savemax += NDPSAVEINC;

    L1:	if (i >= NDP::savetop)
		NDP::savetop = i + 1;
	return i;
}

/*********************************
 * Pop 8087 stack.
 */

#undef pop87

void pop87(
#ifdef DEBUG
	int line, const char *file
#endif
	)
#ifdef DEBUG
#define pop87()	pop87(__LINE__,__FILE__)
#endif
{
	int i;

#if NDPP
	dbg_printf("pop87(%s(%d): stackused=%d)\n", file, line, stackused);
#endif
	--stackused;
	assert(stackused >= 0);
	for (i = 0; i < arraysize(_8087elems) - 1; i++)
		_8087elems[i] = _8087elems[i + 1];
	/* end of stack is nothing	*/
	_8087elems[arraysize(_8087elems) - 1] = ndp_zero;
}

/*******************************
 * Push 8087 stack. Generate and return any code
 * necessary to preserve anything that might run off the end of the stack.
 */

code *push87()
{
	code *c;
	int i;

	c = CNIL;
	// if we would lose the top register off of the stack
	if (_8087elems[7].e != NULL)
	{
		i = getemptyslot();
		NDP::save[i] = _8087elems[7];
		c = genf2(c,0xD9,0xF6);		// FDECSTP
		c = genfwait(c);
		c = ndp_fstp(c, i, _8087elems[7].e->Ety);	// FSTP i[BP]
		assert(stackused == 8);
		if (NDPP) dbg_printf("push87() : overflow\n");
	}
	else
	{
		if (NDPP) dbg_printf("push87(%d)\n",stackused);
		stackused++;
		assert(stackused <= 8);
	}
	// Shift the stack up
	for (i = 7; i > 0; i--)
		_8087elems[i] = _8087elems[i - 1];
	_8087elems[0] = ndp_zero;
	return c;
}

/*****************************
 * Note elem e as being in ST(i) as being a value we want to keep.
 */

#ifdef DEBUG
void note87(elem *e, unsigned offset, int i, int linnum);
void note87(elem *e, unsigned offset, int i)
{
    return note87(e, offset, i, 0);
}
void note87(elem *e, unsigned offset, int i, int linnum)
#define note87(e,offset,i) note87(e,offset,i,__LINE__)
#else
void note87(elem *e, unsigned offset, int i)
#endif
{
#if NDPP
	printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,stackused,linnum);
#endif
#if 0 && DEBUG
	if (_8087elems[i].e)
		printf("_8087elems[%d].e = %p\n",i,_8087elems[i].e);
#endif
	assert(i < stackused);
	_8087elems[i].e = e;
	_8087elems[i].offset = offset;
}

/****************************************************
 * Exchange two entries in 8087 stack.
 */

void xchg87(int i, int j)
{
    NDP save;

    save = _8087elems[i];
    _8087elems[i] = _8087elems[j];
    _8087elems[j] = save;
}

/****************************
 * Make sure that elem e is in register ST(i). Reload it if necessary.
 * Input:
 *	i	0..3	8087 register number
 *	flag	1	don't bother with FXCH
 */

#ifdef DEBUG
STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag,int linnum)
#define makesure87(e,offset,i,flag)	makesure87(e,offset,i,flag,__LINE__)
#else
STATIC code * makesure87(elem *e,unsigned offset,int i,unsigned flag)
#endif
{
	code *c;
	int j;

#ifdef DEBUG
	if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum);
#endif
	assert(e && i < 4);
	c = CNIL;
    L1:
	if (_8087elems[i].e != e || _8087elems[i].offset != offset)
	{
#ifdef DEBUG
		if (_8087elems[i].e)
		    printf("_8087elems[%d].e = %p, .offset = %d\n",i,_8087elems[i].e,_8087elems[i].offset);
#endif
		assert(_8087elems[i].e == NULL);
		for (j = 0; 1; j++)
		{
		    if (j >= NDP::savetop && e->Eoper == OPcomma)
		    {
			e = e->E2;		// try right side
			goto L1;
		    }
#ifdef DEBUG
		    if (j >= NDP::savetop)
			printf("e = %p, NDP::savetop = %d\n",e,NDP::savetop);
#endif
		    assert(j < NDP::savetop);
		    //printf("\tNDP::save[%d] = %p, .offset = %d\n", j, NDP::save[j].e, NDP::save[j].offset);
		    if (e == NDP::save[j].e && offset == NDP::save[j].offset)
			break;
		}
		c = push87();
		c = genfwait(c);
		c = ndp_fld(c, j, e->Ety);		// FLD j[BP]
		if (!(flag & 1))
		{
		    while (i != 0)
		    {
			genf2(c,0xD9,0xC8 + i);		// FXCH ST(i)
			i--;
		    }
		}
		NDP::save[j] = ndp_zero;		// back in 8087
	}
	//_8087elems[i].e = NULL;
	return c;
}

/****************************
 * Save in memory any values in the 8087 that we want to keep.
 */

code *save87()
{
	code *c;
	int i;

	c = CNIL;
	while (_8087elems[0].e && stackused)
	{
		/* Save it	*/
		i = getemptyslot();
		if (NDPP) printf("saving %p in temporary NDP::save[%d]\n",_8087elems[0].e,i);
		NDP::save[i] = _8087elems[0];

		c = genfwait(c);
		c = ndp_fstp(c,i,_8087elems[0].e->Ety);	// FSTP i[BP]
		pop87();
	}
	if (c)				/* if any stores		*/
		genfwait(c);		/* wait for last one to finish	*/
	return c;
}

/******************************************
 * Save any noted values that would be destroyed by n pushes
 */

code *save87regs(unsigned n)
{
    unsigned j;
    unsigned k;
    code *c = NULL;

    assert(n <= 7);
    j = 8 - n;
    if (stackused > j)
    {
	for (k = 8; k > j; k--)
	{
	    c = genf2(c,0xD9,0xF6);	// FDECSTP
	    c = genfwait(c);
	    if (k <= stackused)
	    {   int i;

		i = getemptyslot();
		c = ndp_fstp(c, i, _8087elems[k - 1].e->Ety);	// FSTP i[BP]
		NDP::save[i] = _8087elems[k - 1];
		_8087elems[k - 1] = ndp_zero;
	    }
	}

	for (k = 8; k > j; k--)
	{
	    if (k > stackused)
	    {   c = genf2(c,0xD9,0xF7);	// FINCSTP
		c = genfwait(c);
	    }
	}
	stackused = j;
    }
    return c;
}

/*************************************
 * Find which, if any, slot on stack holds elem e.
 */

STATIC int cse_get(elem *e, unsigned offset)
{   int i;

    for (i = 0; 1; i++)
    {
	if (i == stackused)
	{
	    i = -1;
	    //printf("cse not found\n");
	    //elem_print(e);
	    break;
	}
	if (_8087elems[i].e == e &&
	    _8087elems[i].offset == offset)
	{   //printf("cse found %d\n",i);
	    //elem_print(e);
	    break;
	}
    }
    return i;
}

/*************************************
 * Reload common subexpression.
 */

code *comsub87(elem *e,regm_t *pretregs)
{   code *c;

    // Look on 8087 stack
    int i;

    i = cse_get(e, 0);

    if (tycomplex(e->Ety))
    {	unsigned sz;
	int j;

	sz = tysize(e->Ety);
	j = cse_get(e, sz / 2);
	if (i >= 0 && j >= 0)
	{
	    c = push87();
	    c = cat2(c, push87());
	    c = genf2(c,0xD9,0xC0 + i);		// FLD ST(i)
	    c = genf2(c,0xD9,0xC0 + j + 1);	// FLD ST(j + 1)
	    c = cat2(c,fixresult_complex87(e,mST01,pretregs));
	}
	else
	    // Reload
	    c = loaddata(e,pretregs);
    }
    else
    {
	if (i >= 0)
	{
	    c = push87();
	    c = genf2(c,0xD9,0xC0 + i);	// FLD ST(i)
	    c = cat2(c,fixresult(e,mST0,pretregs));
	}
	else
	    // Reload
	    c = loaddata(e,pretregs);
    }

    freenode(e);
    return c;
}

/**************************
 * Generate code to deal with floatreg.
 */

code * genfltreg(code *c,unsigned opcode,unsigned reg,targ_size_t offset)
{
	floatreg = TRUE;
	reflocal = TRUE;
	if ((opcode & 0xF8) == 0xD8)
	    c = genfwait(c);
	return genc1(c,opcode,modregrm(2,reg,BPRM),FLfltreg,offset);
}

/*******************************
 * Decide if we need to gen an FWAIT.
 */

code *genfwait(code *c)
{
    if (ADDFWAIT())
	c = gen1(c,FWAIT);
    return c;
}

/***************************************
 * Generate floating point instruction.
 */

STATIC code * genf2(code *c,unsigned op,unsigned rm)
{
    return gen2(genfwait(c),op,rm);
}

/***************************
 * Put the 8087 flags into the CPU flags.
 */

STATIC code * cg87_87topsw(code *c)
{
	c = cat2(c,getregs(mAX));
	if (config.target_cpu >= TARGET_80286)
	    c = genf2(c,0xDF,0xE0);		// FSTSW AX
	else
	{   c = genfltreg(c,0xD8+5,7,0);	/* FSTSW floatreg[BP]	*/
	    genfwait(c);			/* FWAIT		*/
	    genfltreg(c,0x8A,4,1);		/* MOV AH,floatreg+1[BP] */
	}
	gen1(c,0x9E);				// SAHF
	code_orflag(c,CFpsw);
	return c;
}

/***************************
 * Set the PSW based on the state of ST0.
 * Input:
 *	pop	if stack should be popped after test
 * Returns:
 *	start of code appended to c.
 */

STATIC code * genftst(code *c,elem *e,int pop)
{
    if (config.flags4 & CFG4fastfloat)	// if fast floating point
    {
	c = genf2(c,0xD9,0xE4);		// FTST
	c = cg87_87topsw(c);		// put 8087 flags in CPU flags
	if (pop)
	{   c = genf2(c,0xDD,modregrm(3,3,0));	// FPOP
	    pop87();
	}
    }
    else if (config.target_cpu >= TARGET_80386)
    {
	// FUCOMP doesn't raise exceptions on QNANs, unlike FTST
	c = cat2(c,push87());
	c = gen2(c,0xD9,0xEE);		// FLDZ
	gen2(c,pop ? 0xDA : 0xDD,0xE9);	// FUCOMPP / FUCOMP
	pop87();
	if (pop)
	    pop87();
	cg87_87topsw(c);		// put 8087 flags in CPU flags
    }
    else
    {
	// Call library function which does not raise exceptions
	regm_t regm = 0;

	c = cat2(c,callclib(e,CLIBftest,&regm,0));
	if (pop)
	{   c = genf2(c,0xDD,modregrm(3,3,0));	// FPOP
	    pop87();
	}
    }
    return c;
}

/*************************************
 * Determine if there is a special 8087 instruction to load
 * constant e.
 * Input:
 *	im	0	load real part
 *		1	load imaginary part
 * Returns:
 *	opcode if found
 *	0 if not
 */

unsigned char loadconst(elem *e, int im)
#if __DMC__
__in
{
    elem_debug(e);
    assert(im == 0 || im == 1);
}
__body
#endif
{
    static float fval[7] =
	{0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
    static double dval[7] =
	{0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
    static long double ldval[7] =
#if __APPLE__
#define	M_PIl		0x1.921fb54442d1846ap+1L	// 3.14159 fldpi
#define	M_LOG2T_L	0x1.a934f0979a3715fcp+1L	// 3.32193 fldl2t
#define	M_LOG2El	0x1.71547652b82fe178p+0L	// 1.4427 fldl2e
#define	M_LOG2_L	0x1.34413509f79fef32p-2L	// 0.30103 fldlg2
#define	M_LN2l		0x1.62e42fefa39ef358p-1L	// 0.693147 fldln2
	{0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l};
#elif __GNUC__
	// BUG: should get proper 80 bit values for these
	#define M_LOG2T_L	LOG2T
	#define M_LOG2_L	LOG2
	{0.0,1.0,M_PIl,M_LOG2T_L,M_LOG2El,M_LOG2_L,M_LN2l};
#else
	//@{0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L};
	{0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2};
#endif
    static char opcode[7 + 1] =
	/* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */
	{0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0};
    int i;
    targ_float f;
    targ_double d;
    targ_ldouble ld;
    int sz;
    int zero;
    void *p;
    static char zeros[sizeof(long double)];

    if (im == 0)
    {
	switch (tybasic(e->Ety))
	{
	    case TYfloat:
	    case TYifloat:
	    case TYcfloat:
		f = e->EV.Vfloat;
		sz = 4;
		p = &f;
		break;

	    case TYdouble:
	    case TYdouble_alias:
	    case TYidouble:
	    case TYcdouble:
		d = e->EV.Vdouble;
		sz = 8;
		p = &d;
		break;

	    case TYldouble:
	    case TYildouble:
	    case TYcldouble:
		ld = e->EV.Vldouble;
		sz = 10;
		p = &ld;
		break;

	    default:
		assert(0);
	}
    }
    else
    {
	switch (tybasic(e->Ety))
	{
	    case TYcfloat:
		f = e->EV.Vcfloat.im;
		sz = 4;
		p = &f;
		break;

	    case TYcdouble:
		d = e->EV.Vcdouble.im;
		sz = 8;
		p = &d;
		break;

	    case TYcldouble:
		ld = e->EV.Vcldouble.im;
		sz = 10;
		p = &ld;
		break;

	    default:
		assert(0);
	}
    }

    // Note that for this purpose, -0 is not regarded as +0,
    // since FLDZ loads a +0
    zero = (memcmp(p, zeros, sz) == 0);
    if (zero && config.target_cpu >= TARGET_PentiumPro)
	return 0xEE;		// FLDZ is the only one with 1 micro-op

    // For some reason, these instructions take more clocks
    if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
	return 0;

    if (zero)
	return 0xEE;

    for (i = 1; i < arraysize(fval); i++)
    {
	switch (sz)
	{
	    case 4:
		if (fval[i] != f)
		    continue;
		break;
	    case 8:
		if (dval[i] != d)
		    continue;
		break;
	    case 10:
		if (ldval[i] != ld)
		    continue;
		break;
	    default:
		assert(0);
	}
	break;
    }
    return opcode[i];
}

/******************************
 * Given the result of an expression is in retregs,
 * generate necessary code to return result in *pretregs.
 */


code *fixresult87(elem *e,regm_t retregs,regm_t *pretregs)
{
    regm_t regm;
    tym_t tym;
    code *c1,*c2;
    unsigned sz;

    //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs);
    //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs));
    assert(!*pretregs || retregs);
    c1 = CNIL;
    c2 = CNIL;
    tym = tybasic(e->Ety);
    sz = tysize[tym];
    //printf("tym = x%x, sz = %d\n", tym, sz);

    if (*pretregs & mST01)
	return fixresult_complex87(e, retregs, pretregs);

    /* if retregs needs to be transferred into the 8087	*/
    if (*pretregs & mST0 && retregs & (mBP | ALLREGS))
    {
	assert(sz <= DOUBLESIZE);
	if (I32)
	{

	    if (*pretregs & mPSW)
	    {	// Set flags
		regm_t r;

		r = retregs | mPSW;
		c1 = fixresult(e,retregs,&r);
	    }
	    c2 = push87();
	    if (sz == REGSIZE)
	    {	unsigned reg;

		reg = findreg(retregs);
		c2 = genfltreg(c2,0x89,reg,0);		// MOV fltreg,reg
		genfltreg(c2,0xD9,0,0);			// FLD float ptr fltreg
	    }
	    else
	    {	unsigned msreg,lsreg;

		msreg = findregmsw(retregs);
		lsreg = findreglsw(retregs);
		c2 = genfltreg(c2,0x89,lsreg,0);	// MOV fltreg,lsreg
		genfltreg(c2,0x89,msreg,4);		// MOV fltreg+4,msreg
		genfltreg(c2,0xDD,0,0);			// FLD double ptr fltreg
	    }
	}
	else
	{
	    regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS;
	    regm |= *pretregs & mPSW;
	    c1 = fixresult(e,retregs,&regm);
	    regm = 0;		// don't worry about result from CLIBxxx
	    c2 = callclib(e,
		    ((sz == FLOATSIZE) ? CLIBfltto87 : CLIBdblto87),
		    &regm,0);
	}
    }
    else if (*pretregs & (mBP | ALLREGS) && retregs & mST0)
    {	unsigned mf;
	unsigned reg;

	assert(sz <= DOUBLESIZE);
	mf = (sz == FLOATSIZE) ? MFfloat : MFdouble;
	if (*pretregs & mPSW && !(retregs & mPSW))
		c1 = genftst(c1,e,0);
	/* FSTP floatreg	*/
	pop87();
	c1 = genfltreg(c1,ESC(mf,1),3,0);
	genfwait(c1);
	c2 = allocreg(pretregs,&reg,(sz == FLOATSIZE) ? TYfloat : TYdouble);
	if (sz == FLOATSIZE)
	{
	    if (I32)
		c2 = genfltreg(c2,0x8B,reg,0);
	    else
	    {	c2 = genfltreg(c2,0x8B,reg,REGSIZE);
		genfltreg(c2,0x8B,findreglsw(*pretregs),0);
	    }
	}
	else
	{
	    if (I32)
	    {	c2 = genfltreg(c2,0x8B,reg,REGSIZE);
		genfltreg(c2,0x8B,findreglsw(*pretregs),0);
	    }
	    else
	    {	c2 = genfltreg(c2,0x8B,AX,6);
		genfltreg(c2,0x8B,BX,4);
		genfltreg(c2,0x8B,CX,2);
		genfltreg(c2,0x8B,DX,0);
	    }
	}
    }
    else if (*pretregs == 0 && retregs == mST0)
    {
	c1 = genf2(c1,0xDD,modregrm(3,3,0));	// FPOP
	pop87();
    }
    else
    {	if (*pretregs & mPSW)
	{   if (!(retregs & mPSW))
	    {	assert(retregs & mST0);
		c1 = genftst(c1,e,!(*pretregs & mST0));	// FTST
	    }
	}
	assert(!(*pretregs & mST0) || (retregs & mST0));
    }
    if (*pretregs & mST0)
	note87(e,0,0);
    return cat2(c1,c2);
}

/********************************
 * Generate in-line 8087 code for the following operators:
 *	add
 *	min
 *	mul
 *	div
 *	cmp
 */

// Reverse the order that the op is done in
static const char oprev[9] = { -1,0,1,2,3,5,4,7,6 };

code *orth87(elem *e,regm_t *pretregs)
{
    unsigned op;
    code *c1,*c2,*c3,*c4;
    code *cx;
    regm_t retregs;
    regm_t resregm;
    elem *e1;
    elem *e2;
    int e2oper;
    int eoper;
    unsigned sz2;
    int clib;
    int reverse = 0;

    //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs));
#if 1	// we could be evaluating / for side effects only
    assert(*pretregs != 0);
#endif
    retregs = mST0;
    resregm = mST0;

    e1 = e->E1;
    e2 = e->E2;
    c3 = CNIL;
    c4 = CNIL;
    sz2 = tysize(e1->Ety);
    if (tycomplex(e1->Ety))
	sz2 /= 2;

    eoper = e->Eoper;
    if (OTrel(eoper))
	eoper = OPeqeq;
    #define X(op, ty1, ty2)	(((op) << 16) + (ty1) * 256 + (ty2))
    switch (X(eoper, tybasic(e1->Ety), tybasic(e2->Ety)))
    {
	case X(OPadd, TYfloat, TYfloat):
	case X(OPadd, TYdouble, TYdouble):
	case X(OPadd, TYdouble_alias, TYdouble_alias):
	case X(OPadd, TYldouble, TYldouble):
	case X(OPadd, TYifloat, TYifloat):
	case X(OPadd, TYidouble, TYidouble):
	case X(OPadd, TYildouble, TYildouble):
	    op = 0;				// FADDP
	    break;

	case X(OPmin, TYfloat, TYfloat):
	case X(OPmin, TYdouble, TYdouble):
	case X(OPmin, TYdouble_alias, TYdouble_alias):
	case X(OPmin, TYldouble, TYldouble):
	case X(OPmin, TYifloat, TYifloat):
	case X(OPmin, TYidouble, TYidouble):
	case X(OPmin, TYildouble, TYildouble):
	    op = 4;				// FSUBP
	    break;

	case X(OPmul, TYfloat, TYfloat):
	case X(OPmul, TYdouble, TYdouble):
	case X(OPmul, TYdouble_alias, TYdouble_alias):
	case X(OPmul, TYldouble, TYldouble):
	case X(OPmul, TYifloat, TYifloat):
	case X(OPmul, TYidouble, TYidouble):
	case X(OPmul, TYildouble, TYildouble):
	case X(OPmul, TYfloat, TYifloat):
	case X(OPmul, TYdouble, TYidouble):
	case X(OPmul, TYldouble, TYildouble):
	case X(OPmul, TYifloat, TYfloat):
	case X(OPmul, TYidouble, TYdouble):
	case X(OPmul, TYildouble, TYldouble):
	    op = 1;				// FMULP
	    break;

	case X(OPdiv, TYfloat, TYfloat):
	case X(OPdiv, TYdouble, TYdouble):
	case X(OPdiv, TYdouble_alias, TYdouble_alias):
	case X(OPdiv, TYldouble, TYldouble):
	case X(OPdiv, TYifloat, TYifloat):
	case X(OPdiv, TYidouble, TYidouble):
	case X(OPdiv, TYildouble, TYildouble):
	    op = 6;				// FDIVP
	    break;

	case X(OPmod, TYfloat, TYfloat):
	case X(OPmod, TYdouble, TYdouble):
	case X(OPmod, TYdouble_alias, TYdouble_alias):
	case X(OPmod, TYldouble, TYldouble):
	case X(OPmod, TYfloat, TYifloat):
	case X(OPmod, TYdouble, TYidouble):
	case X(OPmod, TYldouble, TYildouble):
	case X(OPmod, TYifloat, TYifloat):
	case X(OPmod, TYidouble, TYidouble):
	case X(OPmod, TYildouble, TYildouble):
	case X(OPmod, TYifloat, TYfloat):
	case X(OPmod, TYidouble, TYdouble):
	case X(OPmod, TYildouble, TYldouble):
	    op = (unsigned) -1;
	    break;

	case X(OPeqeq, TYfloat, TYfloat):
	case X(OPeqeq, TYdouble, TYdouble):
	case X(OPeqeq, TYdouble_alias, TYdouble_alias):
	case X(OPeqeq, TYldouble, TYldouble):
	case X(OPeqeq, TYifloat, TYifloat):
	case X(OPeqeq, TYidouble, TYidouble):
	case X(OPeqeq, TYildouble, TYildouble):
	    assert(OTrel(e->Eoper));
	    assert((*pretregs & mST0) == 0);
	    c1 = codelem(e1,&retregs,FALSE);
	    note87(e1,0,0);
	    resregm = mPSW;

	    if (rel_exception(e->Eoper) || config.flags4 & CFG4fastfloat)
	    {
		if (cnst(e2) && !boolres(e2))
		{
		    c1 = genf2(c1,0xD9,0xE4);		// FTST
		    c1 = cg87_87topsw(c1);
		    c2 = genf2(NULL,0xDD,modregrm(3,3,0));	// FPOP
		    pop87();
		}
		else
		{
		    c2 = load87(e2, 0, pretregs, e1, 3);	// FCOMPP
		}
	    }
	    else
	    {
		if (cnst(e2) && !boolres(e2) &&
		    config.target_cpu < TARGET_80386)
		{
		    regm_t regm = 0;

		    c2 = callclib(e,CLIBftest0,&regm,0);
		    pop87();
		}
		else
		{
		    note87(e1,0,0);
		    c2 = load87(e2,0,&retregs,e1,-1);
		    c2 = cat2(c2,makesure87(e1,0,1,0));
		    resregm = 0;
		    if (config.target_cpu >= TARGET_80386)
		    {   c3 = gen2(CNIL,0xDA,0xE9);	// FUCOMPP
			c3 = cg87_87topsw(c3);
			pop87();
			pop87();
		    }
		    else
			// Call a function instead so that exceptions
			// are not generated.
			c3 = callclib(e,CLIBfcompp,&resregm,0);
		}
	    }

	    freenode(e2);
	    return cat4(c1,c2,c3,c4);

	case X(OPadd, TYcfloat, TYcfloat):
	case X(OPadd, TYcdouble, TYcdouble):
	case X(OPadd, TYcldouble, TYcldouble):
	case X(OPadd, TYcfloat, TYfloat):
	case X(OPadd, TYcdouble, TYdouble):
	case X(OPadd, TYcldouble, TYldouble):
	case X(OPadd, TYfloat, TYcfloat):
	case X(OPadd, TYdouble, TYcdouble):
	case X(OPadd, TYldouble, TYcldouble):
	    goto Lcomplex;

	case X(OPadd, TYifloat, TYcfloat):
	case X(OPadd, TYidouble, TYcdouble):
	case X(OPadd, TYildouble, TYcldouble):
	    goto Lcomplex2;

	case X(OPmin, TYcfloat, TYcfloat):
	case X(OPmin, TYcdouble, TYcdouble):
	case X(OPmin, TYcldouble, TYcldouble):
	case X(OPmin, TYcfloat, TYfloat):
	case X(OPmin, TYcdouble, TYdouble):
	case X(OPmin, TYcldouble, TYldouble):
	case X(OPmin, TYfloat, TYcfloat):
	case X(OPmin, TYdouble, TYcdouble):
	case X(OPmin, TYldouble, TYcldouble):
	    goto Lcomplex;

	case X(OPmin, TYifloat, TYcfloat):
	case X(OPmin, TYidouble, TYcdouble):
	case X(OPmin, TYildouble, TYcldouble):
	    goto Lcomplex2;

	case X(OPmul, TYcfloat, TYcfloat):
	case X(OPmul, TYcdouble, TYcdouble):
	case X(OPmul, TYcldouble, TYcldouble):
	    clib = CLIBcmul;
	    goto Lcomplex;

	case X(OPdiv, TYcfloat, TYcfloat):
	case X(OPdiv, TYcdouble, TYcdouble):
	case X(OPdiv, TYcldouble, TYcldouble):
	case X(OPdiv, TYfloat, TYcfloat):
	case X(OPdiv, TYdouble, TYcdouble):
	case X(OPdiv, TYldouble, TYcldouble):
	case X(OPdiv, TYifloat, TYcfloat):
	case X(OPdiv, TYidouble, TYcdouble):
	case X(OPdiv, TYildouble, TYcldouble):
	    clib = CLIBcdiv;
	    goto Lcomplex;

	case X(OPdiv, TYifloat,   TYfloat):
	case X(OPdiv, TYidouble,  TYdouble):
	case X(OPdiv, TYildouble, TYldouble):
	    op = 6;				// FDIVP
	    break;

	Lcomplex:
	    c1 = loadComplex(e1);
	    c2 = loadComplex(e2);
	    c3 = makesure87(e1, sz2, 2, 0);
	    c3 = cat2(c3,makesure87(e1, 0, 3, 0));
	    retregs = mST01;
	    if (eoper == OPadd)
	    {
		c4 = genf2(NULL, 0xDE, 0xC0+2);	// FADDP ST(2),ST
		genf2(c4, 0xDE, 0xC0+2);	// FADDP ST(2),ST
		pop87();
		pop87();
	    }
	    else if (eoper == OPmin)
	    {
		c4 = genf2(NULL, 0xDE, 0xE8+2);	// FSUBP ST(2),ST
		genf2(c4, 0xDE, 0xE8+2);	// FSUBP ST(2),ST
		pop87();
		pop87();
	    }
	    else
		c4 = callclib(e, clib, &retregs, 0);
	    c4 = cat2(c4, fixresult_complex87(e, retregs, pretregs));
	    return cat4(c1,c2,c3,c4);

	Lcomplex2:
	    retregs = mST0;
	    c1 = codelem(e1, &retregs, FALSE);
	    note87(e1, 0, 0);
	    c2 = loadComplex(e2);
	    c3 = makesure87(e1, 0, 2, 0);
	    retregs = mST01;
	    if (eoper == OPadd)
	    {
		c4 = genf2(NULL, 0xDE, 0xC0+2);	// FADDP ST(2),ST
	    }
	    else if (eoper == OPmin)
	    {
		c4 = genf2(NULL, 0xDE, 0xE8+2);	// FSUBP ST(2),ST
		c4 = genf2(c4, 0xD9, 0xE0);	// FCHS
	    }
	    else
		assert(0);
	    pop87();
	    c4 = genf2(c4, 0xD9, 0xC8 + 1);	// FXCH ST(1)
	    c4 = cat2(c4, fixresult_complex87(e, retregs, pretregs));
	    return cat4(c1,c2,c3,c4);

	case X(OPeqeq, TYcfloat, TYcfloat):
	case X(OPeqeq, TYcdouble, TYcdouble):
	case X(OPeqeq, TYcldouble, TYcldouble):
	case X(OPeqeq, TYcfloat, TYifloat):
	case X(OPeqeq, TYcdouble, TYidouble):
	case X(OPeqeq, TYcldouble, TYildouble):
	case X(OPeqeq, TYcfloat, TYfloat):
	case X(OPeqeq, TYcdouble, TYdouble):
	case X(OPeqeq, TYcldouble, TYldouble):
	case X(OPeqeq, TYifloat, TYcfloat):
	case X(OPeqeq, TYidouble, TYcdouble):
	case X(OPeqeq, TYildouble, TYcldouble):
	case X(OPeqeq, TYfloat, TYcfloat):
	case X(OPeqeq, TYdouble, TYcdouble):
	case X(OPeqeq, TYldouble, TYcldouble):
	case X(OPeqeq, TYfloat, TYifloat):
	case X(OPeqeq, TYdouble, TYidouble):
	case X(OPeqeq, TYldouble, TYildouble):
	case X(OPeqeq, TYifloat, TYfloat):
	case X(OPeqeq, TYidouble, TYdouble):
	case X(OPeqeq, TYildouble, TYldouble):
	    c1 = loadComplex(e1);
	    c2 = loadComplex(e2);
	    c3 = makesure87(e1, sz2, 2, 0);
	    c3 = cat2(c3,makesure87(e1, 0, 3, 0));
	    retregs = 0;
	    c4 = callclib(e, CLIBccmp, &retregs, 0);
	    return cat4(c1,c2,c3,c4);


	case X(OPadd, TYfloat, TYifloat):
	case X(OPadd, TYdouble, TYidouble):
	case X(OPadd, TYldouble, TYildouble):
	case X(OPadd, TYifloat, TYfloat):
	case X(OPadd, TYidouble, TYdouble):
	case X(OPadd, TYildouble, TYldouble):

	case X(OPmin, TYfloat, TYifloat):
	case X(OPmin, TYdouble, TYidouble):
	case X(OPmin, TYldouble, TYildouble):
	case X(OPmin, TYifloat, TYfloat):
	case X(OPmin, TYidouble, TYdouble):
	case X(OPmin, TYildouble, TYldouble):
	    retregs = mST0;
	    c1 = codelem(e1, &retregs, FALSE);
	    note87(e1, 0, 0);
	    c2 = codelem(e2, &retregs, FALSE);
	    c3 = makesure87(e1, 0, 1, 0);
	    if (eoper == OPmin)
		c3 = genf2(c3, 0xD9, 0xE0);	// FCHS
	    if (tyimaginary(e1->Ety))
		c3 = genf2(c3, 0xD9, 0xC8 + 1);	// FXCH ST(1)
	    retregs = mST01;
	    c4 = fixresult_complex87(e, retregs, pretregs);
	    return cat4(c1,c2,c3,c4);

	case X(OPadd, TYcfloat, TYifloat):
	case X(OPadd, TYcdouble, TYidouble):
	case X(OPadd, TYcldouble, TYildouble):
	    op = 0;
	    goto Lci;

	case X(OPmin, TYcfloat, TYifloat):
	case X(OPmin, TYcdouble, TYidouble):
	case X(OPmin, TYcldouble, TYildouble):
	    op = 4;
	    goto Lci;

	Lci:
	    c1 = loadComplex(e1);
	    retregs = mST0;
	    c2 = load87(e2,sz2,&retregs,e1,op);
	    freenode(e2);
	    retregs = mST01;
	    c3 = makesure87(e1,0,1,0);
	    c4 = fixresult_complex87(e, retregs, pretregs);
	    return cat4(c1,c2,c3,c4);

	case X(OPmul, TYcfloat, TYfloat):
	case X(OPmul, TYcdouble, TYdouble):
	case X(OPmul, TYcldouble, TYldouble):
	    c1 = loadComplex(e1);
	    goto Lcm1;

	case X(OPmul, TYcfloat, TYifloat):
	case X(OPmul, TYcdouble, TYidouble):
	case X(OPmul, TYcldouble, TYildouble):
	    c1 = loadComplex(e1);
	    c1 = genf2(c1, 0xD9, 0xE0);		// FCHS
	    genf2(c1,0xD9,0xC8 + 1);		// FXCH ST(1)
	    if (elemisone(e2))
	    {
		freenode(e2);
		c2 = NULL;
		c3 = NULL;
		goto Lcd4;
	    }
	    goto Lcm1;

	Lcm1:
	    retregs = mST0;
	    c2 = codelem(e2, &retregs, FALSE);
	    c3 = makesure87(e1, sz2, 1, 0);
	    c3 = cat2(c3,makesure87(e1, 0, 2, 0));
	    goto Lcm2;

	case X(OPmul, TYfloat, TYcfloat):
	case X(OPmul, TYdouble, TYcdouble):
	case X(OPmul, TYldouble, TYcldouble):
	    retregs = mST0;
	    c1 = codelem(e1, &retregs, FALSE);
	    note87(e1, 0, 0);
	    c2 = loadComplex(e2);
	    c3 = makesure87(e1, 0, 2, 0);
	    c3 = genf2(c3,0xD9,0xC8 + 1);	// FXCH ST(1)
	    genf2(c3,0xD9,0xC8 + 2);		// FXCH ST(2)
	    goto Lcm2;

	case X(OPmul, TYifloat, TYcfloat):
	case X(OPmul, TYidouble, TYcdouble):
	case X(OPmul, TYildouble, TYcldouble):
	    retregs = mST0;
	    c1 = codelem(e1, &retregs, FALSE);
	    note87(e1, 0, 0);
	    c2 = loadComplex(e2);
	    c3 = makesure87(e1, 0, 2, 0);
	    c3 = genf2(c3, 0xD9, 0xE0);		// FCHS
	    genf2(c3,0xD9,0xC8 + 2);		// FXCH ST(2)
	    goto Lcm2;

	Lcm2:
	    c3 = genf2(c3,0xDC,0xC8 + 2);	// FMUL ST(2), ST
	    genf2(c3,0xDE,0xC8 + 1);		// FMULP ST(1), ST
	    goto Lcd3;

	case X(OPdiv, TYcfloat, TYfloat):
	case X(OPdiv, TYcdouble, TYdouble):
	case X(OPdiv, TYcldouble, TYldouble):
	    c1 = loadComplex(e1);
	    retregs = mST0;
	    c2 = codelem(e2, &retregs, FALSE);
	    c3 = makesure87(e1, sz2, 1, 0);
	    c3 = cat2(c3,makesure87(e1, 0, 2, 0));
	    goto Lcd1;

	case X(OPdiv, TYcfloat, TYifloat):
	case X(OPdiv, TYcdouble, TYidouble):
	case X(OPdiv, TYcldouble, TYildouble):
	    c1 = loadComplex(e1);
	    c1 = genf2(c1,0xD9,0xC8 + 1);	// FXCH ST(1)
	    xchg87(0, 1);
	    genf2(c1, 0xD9, 0xE0);		// FCHS
	    retregs = mST0;
	    c2 = codelem(e2, &retregs, FALSE);
	    c3 = makesure87(e1, 0, 1, 0);
	    c3 = cat2(c3,makesure87(e1, sz2, 2, 0));
	Lcd1:
	    c3 = genf2(c3,0xDC,0xF8 + 2);	// FDIV ST(2), ST
	    genf2(c3,0xDE,0xF8 + 1);		// FDIVP ST(1), ST
	Lcd3:
	    pop87();
	Lcd4:
	    retregs = mST01;
	    c4 = fixresult_complex87(e, retregs, pretregs);
	    return cat4(c1, c2, c3, c4);

	case X(OPmod, TYcfloat, TYfloat):
	case X(OPmod, TYcdouble, TYdouble):
	case X(OPmod, TYcldouble, TYldouble):
	case X(OPmod, TYcfloat, TYifloat):
	case X(OPmod, TYcdouble, TYidouble):
	case X(OPmod, TYcldouble, TYildouble):
	    /*
			fld	E1.re
			fld	E1.im
			fld	E2
			fxch	ST(1)
		FM1:	fprem
			fstsw	word ptr sw
			fwait
			mov	AH, byte ptr sw+1
			jp	FM1
			fxch	ST(2)
		FM2:	fprem
			fstsw	word ptr sw
			fwait
			mov	AH, byte ptr sw+1
			jp	FM2
			fstp	ST(1)
			fxch	ST(1)
	     */
	    c1 = loadComplex(e1);
	    retregs = mST0;
	    c2 = codelem(e2, &retregs, FALSE);
	    c3 = makesure87(e1, sz2, 1, 0);
	    c3 = cat2(c3,makesure87(e1, 0, 2, 0));
	    c3 = genf2(c3, 0xD9, 0xC8 + 1);		// FXCH ST(1)

	    cx = gen2(NULL, 0xD9, 0xF8);		// FPREM
	    cx = cg87_87topsw(cx);
	    cx = genjmp(cx, JP, FLcode, (block *)cx);	// JP FM1
	    cx = genf2(cx, 0xD9, 0xC8 + 2);		// FXCH ST(2)
	    c3 = cat2(c3,cx);

	    cx = gen2(NULL, 0xD9, 0xF8);		// FPREM
	    cx = cg87_87topsw(cx);
	    cx = genjmp(cx, JP, FLcode, (block *)cx);	// JP FM2
	    cx = genf2(cx,0xDD,0xD8 + 1);		// FSTP ST(1)
	    cx = genf2(cx, 0xD9, 0xC8 + 1);		// FXCH ST(1)
	    c3 = cat2(c3,cx);

	    goto Lcd3;

	default:
#ifdef DEBUG
	    elem_print(e);
#endif
	    assert(0);
	    break;
    }
    #undef X

    e2oper = e2->Eoper;
    if (e1->Eoper == OPconst ||
	(e1->Eoper == OPvar &&
	 ((e1->Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) ||
	  (e2oper == OPd_f &&
	    (e2->E1->Eoper == OPs32_d || e2->E1->Eoper == OPs64_d || e2->E1->Eoper == OPs16_d) &&
	    e2->E1->E1->Eoper == OPvar
	  ) ||
	  ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) &&
	    e2->E1->Eoper == OPvar
	  )
	 )
	)
       )
    {   // Reverse order of evaluation
	e1 = e->E2;
	e2 = e->E1;
	op = oprev[op + 1];
	reverse ^= 1;
    }

    c1 = codelem(e1,&retregs,FALSE);
    note87(e1,0,0);

    if (config.flags4 & CFG4fdivcall && e->Eoper == OPdiv)
    {   regm_t retregs;

	retregs = mST0;
	c2 = load87(e2,0,&retregs,e1,-1);
	c2 = cat2(c2,makesure87(e1,0,1,0));
	if (op == 7)			// if reverse divide
	    c2 = genf2(c2,0xD9,0xC8 + 1);	// FXCH ST(1)
	c2 = cat2(c2,callclib(e,CLIBfdiv87,&retregs,0));
	pop87();
	resregm = mST0;
	freenode(e2);
	c4 = fixresult87(e,resregm,pretregs);
    }
    else if (e->Eoper == OPmod)
    {
	/*
	 *		fld	tbyte ptr y
	 *		fld	tbyte ptr x		// ST = x, ST1 = y
	 *	FM1:	// We don't use fprem1 because for some inexplicable
	 *		// reason we get -5 when we do _modulo(15, 10)
	 *		fprem				// ST = ST % ST1
	 *		fstsw	word ptr sw
	 *		fwait
	 *		mov	AH,byte ptr sw+1	// get msb of status word in AH
	 *		sahf				// transfer to flags
	 *		jp	FM1			// continue till ST < ST1
	 *		fstp	ST(1)			// leave remainder on stack
	 */
	regm_t retregs;

	retregs = mST0;
	c2 = load87(e2,0,&retregs,e1,-1);
	c2 = cat2(c2,makesure87(e1,0,1,0));	// now have x,y on stack; need y,x
	if (!reverse)				// if not reverse modulo
	    c2 = genf2(c2,0xD9,0xC8 + 1);	// FXCH ST(1)

	c3 = gen2(NULL, 0xD9, 0xF8);		// FM1: FPREM
	c3 = cg87_87topsw(c3);
	c3 = genjmp(c3, JP, FLcode, (block *)c3); // JP FM1
	c3 = genf2(c3,0xDD,0xD8 + 1);		// FSTP ST(1)

	pop87();
	resregm = mST0;
	freenode(e2);
	c4 = fixresult87(e,resregm,pretregs);
    }
    else
    {   c2 = load87(e2,0,pretregs,e1,op);
	freenode(e2);
    }
    if (*pretregs & mST0)
	note87(e,0,0);
    //printf("orth87(-e = %p, *pretregs = x%x)\n", e, *pretregs);
    return cat4(c1,c2,c3,c4);
}

/*****************************
 * Load e into ST01.
 */

code *loadComplex(elem *e)
{   int sz;
    regm_t retregs;
    code *c;

    sz = tysize(e->Ety);
    switch (tybasic(e->Ety))
    {
	case TYfloat:
	case TYdouble:
	case TYldouble:
	    retregs = mST0;
	    c = codelem(e,&retregs,FALSE);
	    // Convert to complex with a 0 for the imaginary part
	    c = cat2(c, push87());
	    c = gen2(c,0xD9,0xEE);		// FLDZ
	    break;

	case TYifloat:
	case TYidouble:
	case TYildouble:
	    // Convert to complex with a 0 for the real part
	    c = push87();
	    c = gen2(c,0xD9,0xEE);		// FLDZ
	    retregs = mST0;
	    c = cat2(c, codelem(e,&retregs,FALSE));
	    break;

	case TYcfloat:
	case TYcdouble:
	case TYcldouble:
	    sz /= 2;
	    retregs = mST01;
	    c = codelem(e,&retregs,FALSE);
	    break;

	default:
	    assert(0);
    }
    note87(e, 0, 1);
    note87(e, sz, 0);
    return c;
}

/*************************
 * If op == -1, load expression e into ST0.
 * else compute (eleft op e), eleft is in ST0.
 * Must follow same logic as cmporder87();
 */

code *load87(elem *e,unsigned eoffset,regm_t *pretregs,elem *eleft,int op)
{
	elem *e1;
	code *ccomma,*c,*c2,*cpush;
	code cs;
	regm_t retregs;
	unsigned reg,mf,mf1;
	int opr;
	unsigned char ldop;
	tym_t ty;
	int i;

#if NDPP
	printf("+load87(e=%p, eoffset=%d, *pretregs=x%x, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,*pretregs,eleft,op,stackused);
#endif
	elem_debug(e);
	ccomma = NULL;
	cpush = NULL;
	if (ADDFWAIT())
	    cs.Iflags = CFwait;
	else
	    cs.Iflags = 0;
	cs.Ijty = 0;
	opr = oprev[op + 1];
	ty = tybasic(e->Ety);
	if ((ty == TYldouble || ty == TYildouble) &&
	    op != -1 && e->Eoper != OPd_ld)
	    goto Ldefault;
	mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble;
    L5:
	switch (e->Eoper)
	{
	    case OPcomma:
		ccomma = docommas(&e);
//		if (op != -1)
//		    ccomma = cat2(ccomma,makesure87(eleft,eoffset,0,0));
		goto L5;

	    case OPvar:
		notreg(e);
	    case OPind:
	    L2:
		if (op != -1)
		{
		    if (e->Ecount && e->Ecount != e->Ecomsub &&
			(i = cse_get(e, 0)) >= 0)
		    {	static unsigned char b2[8] = {0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8};

			c = genf2(NULL,0xD8,b2[op] + i);	// Fop ST(i)
		    }
		    else
		    {
			c = getlvalue(&cs,e,0);
			c = cat2(c,makesure87(eleft,eoffset,0,0));
			cs.Iop = ESC(mf,0);
			cs.Irm |= modregrm(0,op,0);
			c = gen(c,&cs);
		    }
		}
		else
		{
		    cpush = push87();
		    switch (ty)
		    {
			case TYfloat:
			case TYdouble:
			case TYifloat:
			case TYidouble:
			case TYcfloat:
			case TYcdouble:
			case TYdouble_alias:
			    c = loadea(e,&cs,ESC(mf,1),0,0,0,0);	// FLD var
			    break;
			case TYldouble:
			case TYildouble:
			case TYcldouble:
			    c = loadea(e,&cs,0xDB,5,0,0,0);		// FLD var
			    break;
			default:
			    // __debug printf("ty = x%x\n", ty);
			    assert(0);
			    break;
		    }
		    note87(e,0,0);
		}
		break;
	    case OPd_f:
	    case OPf_d:
	    case OPd_ld:
		mf1 = (tybasic(e->E1->Ety) == TYfloat || tybasic(e->E1->Ety) == TYifloat)
			? MFfloat : MFdouble;
		if (op != -1 && stackused)
		    note87(eleft,eoffset,0);	// don't trash this value
		if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
		{
#if 1
		L4:
		    c = getlvalue(&cs,e->E1,0);
		    cs.Iop = ESC(mf1,0);
		    if (ADDFWAIT())
			cs.Iflags |= CFwait;
		    if (I32)
			cs.Iflags &= ~CFopsize;
		    if (op != -1)
		    {	cs.Irm |= modregrm(0,op,0);
			c = cat2(c,makesure87(eleft,eoffset,0,0));
		    }
		    else
		    {	cs.Iop |= 1;
			c = cat2(c,push87());
		    }
		    c = gen(c,&cs);			/* FLD / Fop	*/
#else
		    c = loadea(e->E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e->E1 */
#endif
		    /* Variable cannot be put into a register anymore	*/
		    if (e->E1->Eoper == OPvar)
			notreg(e->E1);
		    freenode(e->E1);
		}
		else
		{
		    retregs = mST0;
		    c = codelem(e->E1,&retregs,FALSE);
		    if (op != -1)
		    {	c = cat2(c,makesure87(eleft,eoffset,1,0));
			c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
			pop87();
		    }
		}
		break;

	    case OPs64_d:
		if (e->E1->Eoper == OPvar ||
		    (e->E1->Eoper == OPind && e->E1->Ecount == 0))
		{
		    c = getlvalue(&cs,e->E1,0);
		    cs.Iop = 0xDF;
		    if (ADDFWAIT())
			cs.Iflags |= CFwait;
		    if (I32)
			cs.Iflags &= ~CFopsize;
		    c = cat2(c,push87());
		    cs.Irm |= modregrm(0,5,0);
		    c = gen(c,&cs);			// FILD m64
		    // Variable cannot be put into a register anymore
		    if (e->E1->Eoper == OPvar)
			notreg(e->E1);
		    freenode(e->E1);
		}
		else
		{
		    retregs = ALLREGS;
		    c = codelem(e->E1,&retregs,FALSE);
		    reg = findreglsw(retregs);
		    c = genfltreg(c,0x89,reg,0);	// MOV floatreg,reglsw
		    reg = findregmsw(retregs);
		    c = genfltreg(c,0x89,reg,4);	// MOV floatreg+4,regmsw
		    c = cat2(c,push87());
		    c = genfltreg(c,0xDF,5,0);		// FILD long long ptr floatreg
		}
		if (op != -1)
		{   c = cat2(c,makesure87(eleft,eoffset,1,0));
		    c = genf2(c,0xDE,modregrm(3,opr,1)); // FopRP
		    pop87();
		}
		break;

	    case OPconst:
		ldop = loadconst(e, 0);
		if (ldop)
		{
		    cpush = push87();
		    c = genf2(NULL,0xD9,ldop);		// FLDx
		    if (op != -1)
		    {	genf2(c,0xDE,modregrm(3,opr,1));	// FopRP
			pop87();
		    }
		}
		else
		{
		    assert(0);
		}
		break;

	    case OPu16_d:
	    {	regm_t mswregs;

		/* This opcode should never be generated	*/
		/* (probably shouldn't be for 16 bit code too)	*/
		assert(!I32);

		if (op != -1)
		    note87(eleft,eoffset,0);	// don't trash this value
		retregs = ALLREGS & mLSW;
		c = codelem(e->E1,&retregs,FALSE);
		c = regwithvalue(c,ALLREGS & mMSW,0,&reg,0);  // 0-extend
		retregs |= mask[reg];
		mf1 = MFlong;
		goto L3;
	    }
	    case OPs16_d:	mf1 = MFword;	goto L6;
	    case OPs32_d:	mf1 = MFlong;	goto L6;
	    L6:
		if (op != -1)
		    note87(eleft,eoffset,0);	// don't trash this value
		if (e->E1->Eoper == OPvar ||
		    (e->E1->Eoper == OPind && e->E1->Ecount == 0))
		{
		    goto L4;
		}
		else
		{
		    retregs = ALLREGS;
		    c = codelem(e->E1,&retregs,FALSE);
		L3:
		    if (!I32 && e->Eoper != OPs16_d)
		    {
			/* MOV floatreg+2,reg	*/
			reg = findregmsw(retregs);
			c = genfltreg(c,0x89,reg,REGSIZE);
			retregs &= mLSW;
		    }
		    reg = findreg(retregs);
		    c = genfltreg(c,0x89,reg,0);	/* MOV floatreg,reg */
		    if (op != -1)
		    {	c = cat2(c,makesure87(eleft,eoffset,0,0));
			genfltreg(c,ESC(mf1,0),op,0);	/* Fop floatreg	*/
		    }
		    else
		    {
			/* FLD long ptr floatreg	*/
			c = cat2(c,push87());
			c = genfltreg(c,ESC(mf1,1),0,0);
		    }
		}
		break;
	    default:
	    Ldefault:
		retregs = mST0;
#if 1		/* Do this instead of codelem() to avoid the freenode(e).
		   We also lose CSE capability	*/
		if (e->Eoper == OPconst)
		{
		    c = load87(e, 0, &retregs, NULL, -1);
		}
		else
		    c = (*cdxxx[e->Eoper])(e,&retregs);
#else
		c = codelem(e,&retregs,FALSE);
#endif
		if (op != -1)
		{
		    c = cat2(c,makesure87(eleft,eoffset,1,(op == 0 || op == 1)));
		    pop87();
		    if (op == 4 || op == 6)	// sub or div
		    {	code *cl;

			cl = code_last(c);
			if (cl && cl->Iop == 0xD9 && cl->Irm == 0xC9)	// FXCH ST(1)
			{   cl->Iop = NOP;
			    opr = op;		// reverse operands
			}
		    }
		    c = genf2(c,0xDE,modregrm(3,opr,1));	// FopRP
		}
		break;
	}
	if (op == 3)			// FCOMP
	{   pop87();			// extra pop was done
	    cg87_87topsw(c);
	}
	c2 = fixresult87(e,((op == 3) ? mPSW : mST0),pretregs);
#if NDPP
	printf("-load87(e=%p, eoffset=%d, *pretregs=x%x, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,*pretregs,eleft,op,stackused);
#endif
	return cat4(ccomma,cpush,c,c2);
}

/********************************
 * Determine if a compare is to be done forwards (return 0)
 * or backwards (return 1).
 * Must follow same logic as load87().
 */

int cmporder87(elem *e)
{
    //printf("cmporder87(%p)\n",e);
L1:
	switch (e->Eoper)
	{
	    case OPcomma:
		e = e->E2;
		goto L1;

	    case OPd_f:
	    case OPf_d:
	    case OPd_ld:
		if (e->E1->Eoper == OPvar || e->E1->Eoper == OPind)
		    goto ret0;
		else
		    goto ret1;

	    case OPconst:
		if (loadconst(e, 0) || tybasic(e->Ety) == TYldouble
				    || tybasic(e->Ety) == TYildouble)
{
//printf("ret 1, loadconst(e) = %d\n", loadconst(e));
		    goto ret1;
}
		goto ret0;

	    case OPvar:
	    case OPind:
		if (tybasic(e->Ety) == TYldouble ||
		    tybasic(e->Ety) == TYildouble)
		    goto ret1;
	    case OPu16_d:
	    case OPs16_d:
	    case OPs32_d:
	    case OPs64_d:
		goto ret0;

	    default:
		goto ret1;
	}

ret1:	return 1;
ret0:	return 0;
}

/*******************************
 * Perform an assignment to a long double/double/float.
 */

code *eq87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	code cs;
	unsigned op1;
	unsigned op2;
	tym_t ty1;

	//printf("+eq87(e = %p, *pretregs = x%x)\n", e, *pretregs);
	assert(e->Eoper == OPeq);
	retregs = mST0 | (*pretregs & mPSW);
	c1 = codelem(e->E2,&retregs,FALSE);
	ty1 = tybasic(e->E1->Ety);
	switch (ty1)
	{   case TYdouble_alias:
	    case TYidouble:
	    case TYdouble:	op1 = ESC(MFdouble,1);	op2 = 3; break;
	    case TYifloat:
	    case TYfloat:	op1 = ESC(MFfloat,1);	op2 = 3; break;
	    case TYildouble:
	    case TYldouble:	op1 = 0xDB;		op2 = 7; break;
	    default:
		assert(0);
	}
	if (*pretregs & (mST0 | ALLREGS | mBP))	// if want result on stack too
	{
	    if (ty1 == TYldouble || ty1 == TYildouble)
	    {
		c1 = cat2(c1,push87());
		c1 = genf2(c1,0xD9,0xC0);	// FLD ST(0)
		pop87();
	    }
	    else
		op2 = 2;			// FST e->E1
	}
	else
	{					// FSTP e->E1
	    pop87();
	}
#if 0
	// Doesn't work if ST(0) gets saved to the stack by getlvalue()
	c2 = loadea(e->E1,&cs,op1,op2,0,0,0);
#else
	cs.Ijty = 0;
	cs.Iflags = 0;
	cs.Iop = op1;
	if (*pretregs & (mST0 | ALLREGS | mBP))	// if want result on stack too
	{   // Make sure it's still there
	    elem *e2 = e->E2;
	    while (e2->Eoper == OPcomma)
		e2 = e2->E2;
	    note87(e2,0,0);
	    c2 = getlvalue(&cs, e->E1, 0);
	    c2 = cat2(c2,makesure87(e2,0,0,1));
	}
	else
	{
	    c2 = getlvalue(&cs, e->E1, 0);
	}
	cs.Irm |= modregrm(0,op2,0);		// OR in reg field
	if (I32)
	    cs.Iflags &= ~CFopsize;
	else if (ADDFWAIT())
	    cs.Iflags |= CFwait;
	c2 = gen(c2, &cs);
#if LNGDBLSIZE == 12
	/* This deals with the fact that 10 byte reals really
	 * occupy 12 bytes by zeroing the extra 2 bytes.
	 */
	if (op1 == 0xDB)
	{
	    cs.Iop = 0xC7;			// MOV EA+10,0
	    NEWREG(cs.Irm, 0);
	    cs.IEV1.sp.Voffset += 10;
	    cs.IFL2 = FLconst;
	    cs.IEV2.Vint = 0;
	    cs.Iflags |= CFopsize;
	    c2 = gen(c2, &cs);
	}
#endif
#if LNGDBLSIZE == 16
	/* This deals with the fact that 10 byte reals really
	 * occupy 16 bytes by zeroing the extra 6 bytes.
	 */
	if (op1 == 0xDB)
	{
	    cs.Iop = 0xC7;			// MOV EA+10,0
	    NEWREG(cs.Irm, 0);
	    cs.IEV1.sp.Voffset += 10;
	    cs.IFL2 = FLconst;
	    cs.IEV2.Vint = 0;
	    cs.Iflags |= CFopsize;
	    c2 = gen(c2, &cs);

	    cs.IEV1.sp.Voffset += 2;
	    cs.Iflags &= ~CFopsize;
	    c2 = gen(c2, &cs);
	}
#endif
#endif
	c2 = genfwait(c2);
	freenode(e->E1);
	c1 = cat3(c1,c2,fixresult87(e,mST0 | mPSW,pretregs));
	return c1;
}

/*******************************
 * Perform an assignment to a long double/double/float.
 */

code *complex_eq87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	code cs;
	unsigned op1;
	unsigned op2;
	unsigned sz;
	tym_t ty1;
	int fxch = 0;

	assert(e->Eoper == OPeq);
	cs.Iflags = ADDFWAIT() ? CFwait : 0;
	cs.Ijty = 0;
	retregs = mST01 | (*pretregs & mPSW);
	c1 = codelem(e->E2,&retregs,FALSE);
	ty1 = tybasic(e->E1->Ety);
	switch (ty1)
	{
	    case TYcdouble:	op1 = ESC(MFdouble,1);	op2 = 3; break;
	    case TYcfloat:	op1 = ESC(MFfloat,1);	op2 = 3; break;
	    case TYcldouble:	op1 = 0xDB;		op2 = 7; break;
	    default:
		assert(0);
	}
	if (*pretregs & mST01)	// if want result on stack too
	{
	    if (ty1 == TYcldouble)
	    {
		c1 = cat2(c1,push87());
		c1 = cat2(c1,push87());
		c1 = genf2(c1,0xD9,0xC0 + 1);	// FLD ST(1)
		genf2(c1,0xD9,0xC0 + 1);	// FLD ST(1)
		pop87();
		pop87();
	    }
	    else
	    {	op2 = 2;			// FST e->E1
		fxch = 1;
	    }
	}
	else
	{					// FSTP e->E1
	    pop87();
	    pop87();
	}
	sz = tysize(ty1) / 2;
	c2 = loadea(e->E1,&cs,op1,op2,sz,0,0);
	c2 = genfwait(c2);
	if (fxch)
	    c2 = genf2(c2,0xD9,0xC8 + 1);	// FXCH ST(1)
	cs.IEVoffset1 -= sz;
	gen(c2, &cs);
	if (fxch)
	    genf2(c2,0xD9,0xC8 + 1);		// FXCH ST(1)
#if LNGDBLSIZE == 12
	if (op1 == 0xDB)
	{
	    cs.Iop = 0xC7;			// MOV EA+10,0
	    NEWREG(cs.Irm, 0);
	    cs.IEV1.sp.Voffset += 10;
	    cs.IFL2 = FLconst;
	    cs.IEV2.Vint = 0;
	    cs.Iflags |= CFopsize;
	    c2 = gen(c2, &cs);
	    cs.IEVoffset1 += 12;
	    c2 = gen(c2, &cs);			// MOV EA+22,0
	}
#endif
#if LNGDBLSIZE == 16
	if (op1 == 0xDB)
	{
	    cs.Iop = 0xC7;			// MOV EA+10,0
	    NEWREG(cs.Irm, 0);
	    cs.IEV1.sp.Voffset += 10;
	    cs.IFL2 = FLconst;
	    cs.IEV2.Vint = 0;
	    cs.Iflags |= CFopsize;
	    c2 = gen(c2, &cs);

	    cs.IEV1.sp.Voffset += 2;
	    cs.Iflags &= ~CFopsize;
	    c2 = gen(c2, &cs);

	    cs.IEVoffset1 += 14;
	    cs.Iflags |= CFopsize;
	    c2 = gen(c2, &cs);

	    cs.IEV1.sp.Voffset += 2;
	    cs.Iflags &= ~CFopsize;
	    c2 = gen(c2, &cs);
	}
#endif
	c2 = genfwait(c2);
	freenode(e->E1);
	return cat3(c1,c2,fixresult_complex87(e,mST01 | mPSW,pretregs));
}

/*******************************
 * Perform an assignment while converting to integral type,
 * i.e. handle (e1 = (int) e2)
 */

code *cnvteq87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	code cs;
	unsigned op1;
	unsigned op2;
	tym_t ty1;

	assert(e->Eoper == OPeq);
	assert(!*pretregs);
	retregs = mST0;
	elem_debug(e->E2);
	c1 = codelem(e->E2->E1,&retregs,FALSE);

	switch (e->E2->Eoper)
	{   case OPd_s16:
		op1 = ESC(MFword,1);
		op2 = 3;
		break;
	    case OPd_s32:
	    case OPd_u16:
		op1 = ESC(MFlong,1);
		op2 = 3;
		break;
	    case OPd_s64:
		op1 = 0xDF;
		op2 = 7;
		break;
	    default:
		assert(0);
	}
	freenode(e->E2);

	c1 = genfwait(c1);
	c1 = genrnd(c1, CW_roundto0);	// FLDCW roundto0

	pop87();
	cs.Iflags = ADDFWAIT() ? CFwait : 0;
	if (e->E1->Eoper == OPvar)
	    notreg(e->E1);			// cannot be put in register anymore
	c2 = loadea(e->E1,&cs,op1,op2,0,0,0);

	c2 = genfwait(c2);
	c2 = genrnd(c2, CW_roundtonearest);	// FLDCW roundtonearest

	freenode(e->E1);
	return cat2(c1,c2);
}

/**********************************
 * Perform +=, -=, *= and /= for doubles.
 */

code *opass87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *cl,*cr,*c;
	code cs;
	unsigned op;
	unsigned opld;
	unsigned op1;
	unsigned op2;
	tym_t ty1;

	ty1 = tybasic(e->E1->Ety);
	switch (ty1)
	{   case TYdouble_alias:
	    case TYidouble:
	    case TYdouble:	op1 = ESC(MFdouble,1);	op2 = 3; break;
	    case TYifloat:
	    case TYfloat:	op1 = ESC(MFfloat,1);	op2 = 3; break;
	    case TYildouble:
	    case TYldouble:	op1 = 0xDB;		op2 = 7; break;

	    case TYcfloat:
	    case TYcdouble:
	    case TYcldouble:
		return (e->Eoper == OPmodass)
			? opmod_complex87(e, pretregs)
			: opass_complex87(e, pretregs);

	    default:
		assert(0);
	}
	switch (e->Eoper)
	{   case OPpostinc:
	    case OPaddass:	op = 0 << 3;	opld = 0xC1;	break;	// FADD
	    case OPpostdec:
	    case OPminass:	op = 5 << 3;	opld = 0xE1; /*0xE9;*/	break;	// FSUBR
	    case OPmulass:	op = 1 << 3;	opld = 0xC9;	break;	// FMUL
	    case OPdivass:	op = 7 << 3;	opld = 0xF1;	break;	// FDIVR
	    case OPmodass:	break;
	    default:		assert(0);
	}
	retregs = mST0;
	cr = codelem(e->E2,&retregs,FALSE);	// evaluate rvalue
	note87(e->E2,0,0);
	cl = getlvalue(&cs,e->E1,0);
	cl = cat2(cl,makesure87(e->E2,0,0,0));
	cs.Iflags |= ADDFWAIT() ? CFwait : 0;
	if (I32)
	    cs.Iflags &= ~CFopsize;
	if (config.flags4 & CFG4fdivcall && e->Eoper == OPdivass)
	{
	    c = push87();
	    cs.Iop = op1;
	    if (ty1 == TYldouble || ty1 == TYildouble)
		cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
	    c = gen(c,&cs);
	    c = genf2(c,0xD9,0xC8 + 1);		// FXCH ST(1)
	    c = cat2(c,callclib(e,CLIBfdiv87,&retregs,0));
	    pop87();
	}
	else if (e->Eoper == OPmodass)
	{
	    /*
	     *		fld	tbyte ptr y
	     *		fld	tbyte ptr x		// ST = x, ST1 = y
	     *	FM1:	// We don't use fprem1 because for some inexplicable
	     *		// reason we get -5 when we do _modulo(15, 10)
	     *		fprem				// ST = ST % ST1
	     *		fstsw	word ptr sw
	     *		fwait
	     *		mov	AH,byte ptr sw+1	// get msb of status word in AH
	     *		sahf				// transfer to flags
	     *		jp	FM1			// continue till ST < ST1
	     *		fstp	ST(1)			// leave remainder on stack
	     */
	    code *c1;

	    c = push87();
	    cs.Iop = op1;
	    if (ty1 == TYldouble || ty1 == TYildouble)
		cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
	    c = gen(c,&cs);			// FLD   e->E1

	    c1 = gen2(NULL, 0xD9, 0xF8);	// FPREM
	    c1 = cg87_87topsw(c1);
	    c1 = genjmp(c1, JP, FLcode, (block *)c1);	// JP FM1
	    c1 = genf2(c1,0xDD,0xD8 + 1);	// FSTP ST(1)
	    c = cat2(c,c1);

	    pop87();
	}
	else if (ty1 == TYldouble || ty1 == TYildouble)
	{
	    c = push87();
	    cs.Iop = op1;
	    cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
	    c = gen(c,&cs);			// FLD   e->E1
	    genf2(c,0xDE,opld);			// FopP  ST(1)
	    pop87();
	}
	else
	{   cs.Iop = op1 & ~1;
	    cs.Irm |= op;
	    c = gen(CNIL,&cs);			// Fop e->E1
	}
	if (*pretregs & mPSW)
	    genftst(c,e,0);			// FTST ST0
	/* if want result in registers	*/
	if (*pretregs & (mST0 | ALLREGS | mBP))
	{
	    if (ty1 == TYldouble || ty1 == TYildouble)
	    {
		c = cat2(c,push87());
		c = genf2(c,0xD9,0xC0);		// FLD ST(0)
		pop87();
	    }
	    else
		op2 = 2;			// FST e->E1
	}
	else
	{					// FSTP
	    pop87();
	}
	cs.Iop = op1;
	NEWREG(cs.Irm,op2);			// FSTx e->E1
	freenode(e->E1);
	gen(c,&cs);
	genfwait(c);
	return cat4(cr,cl,c,fixresult87(e,mST0 | mPSW,pretregs));
}

/***********************************
 * Perform %= where E1 is complex and E2 is real or imaginary.
 */

code *opmod_complex87(elem *e,regm_t *pretregs)
{
    regm_t retregs;
    regm_t idxregs;
    code *cl,*cr,*c;
    code cs;
    tym_t ty1;
    unsigned sz2;

    /*		fld	E2
		fld	E1.re
	FM1:	fprem
		fstsw	word ptr sw
		fwait
		mov	AH, byte ptr sw+1
		jp	FM1
		fxch	ST(1)
		fld	E1.im
	FM2:	fprem
		fstsw	word ptr sw
		fwait
		mov	AH, byte ptr sw+1
		jp	FM2
		fstp	ST(1)
     */

    ty1 = tybasic(e->E1->Ety);
    sz2 = tysize[ty1] / 2;

    retregs = mST0;
    cr = codelem(e->E2,&retregs,FALSE);		// FLD E2
    note87(e->E2,0,0);
    cl = getlvalue(&cs,e->E1,0);
    cl = cat2(cl,makesure87(e->E2,0,0,0));
    cs.Iflags |= ADDFWAIT() ? CFwait : 0;
    if (I32)
	cs.Iflags &= ~CFopsize;

    c = push87();
    switch (ty1)
    {
	case TYcdouble:	 cs.Iop = ESC(MFdouble,1);	break;
	case TYcfloat:	 cs.Iop = ESC(MFfloat,1);	break;
	case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break;
	default:
	    assert(0);
    }
    c = gen(c,&cs);				// FLD E1.re

    code *c1;

    c1 = gen2(NULL, 0xD9, 0xF8);		// FPREM
    c1 = cg87_87topsw(c1);
    c1 = genjmp(c1, JP, FLcode, (block *)c1);	// JP FM1
    c1 = genf2(c1, 0xD9, 0xC8 + 1);		// FXCH ST(1)
    c = cat2(c,c1);

    c = cat2(c, push87());
    cs.IEVoffset1 += sz2;
    gen(c, &cs);				// FLD E1.im

    c1 = gen2(NULL, 0xD9, 0xF8);		// FPREM
    c1 = cg87_87topsw(c1);
    c1 = genjmp(c1, JP, FLcode, (block *)c1);	// JP FM2
    c1 = genf2(c1,0xDD,0xD8 + 1);		// FSTP ST(1)
    c = cat2(c,c1);

    pop87();

    if (*pretregs & (mST01 | mPSW))
    {
	cs.Irm |= modregrm(0, 2, 0);
	gen(c, &cs);		// FST mreal.im
	cs.IEVoffset1 -= sz2;
	gen(c, &cs);		// FST mreal.re
	retregs = mST01;
    }
    else
    {
	cs.Irm |= modregrm(0, 3, 0);
	gen(c, &cs);		// FSTP mreal.im
	cs.IEVoffset1 -= sz2;
	gen(c, &cs);		// FSTP mreal.re
	pop87();
	pop87();
	retregs = 0;
    }
    freenode(e->E1);
    genfwait(c);
    return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));
}

/**********************************
 * Perform +=, -=, *= and /= for the lvalue being complex.
 */

code *opass_complex87(elem *e,regm_t *pretregs)
{
    regm_t retregs;
    regm_t idxregs;
    code *cl,*cr,*c;
    code cs;
    unsigned op;
    unsigned op2;
    tym_t ty1;
    unsigned sz2;

    ty1 = tybasic(e->E1->Ety);
    sz2 = tysize[ty1] / 2;
    switch (e->Eoper)
    {   case OPpostinc:
	case OPaddass:	op = 0 << 3;		// FADD
			op2 = 0xC0;		// FADDP ST(i),ST
			break;
	case OPpostdec:
	case OPminass:	op = 5 << 3;		// FSUBR
			op2 = 0xE0;		// FSUBRP ST(i),ST
			break;
	case OPmulass:	op = 1 << 3;		// FMUL
			op2 = 0xC8;		// FMULP ST(i),ST
			break;
	case OPdivass:	op = 7 << 3;		// FDIVR
			op2 = 0xF0;		// FDIVRP ST(i),ST
			break;
	default:	assert(0);
    }

    if (!tycomplex(e->E2->Ety) &&
	(e->Eoper == OPmulass || e->Eoper == OPdivass))
    {
	retregs = mST0;
	cr = codelem(e->E2, &retregs, FALSE);
	note87(e->E2, 0, 0);
	cl = getlvalue(&cs, e->E1, 0);
	cl = cat2(cl,makesure87(e->E2,0,0,0));
	cl = cat2(cl,push87());
	cl = genf2(cl,0xD9,0xC0);		// FLD ST(0)
	goto L1;
    }
    else
    {
	cr = loadComplex(e->E2);
	cl = getlvalue(&cs,e->E1,0);
	cl = cat2(cl,makesure87(e->E2,sz2,0,0));
	cl = cat2(cl,makesure87(e->E2,0,1,0));
    }
    cs.Iflags |= ADDFWAIT() ? CFwait : 0;
    if (I32)
	cs.Iflags &= ~CFopsize;

    switch (e->Eoper)
    {
	case OPpostinc:
	case OPaddass:
	case OPpostdec:
	case OPminass:
	L1:
	    if (ty1 == TYcldouble)
	    {
		c = push87();
		c = cat2(c, push87());
		cs.Iop = 0xDB;
		cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
		c = gen(c,&cs);			// FLD e->E1.re
		cs.IEVoffset1 += sz2;
		gen(c,&cs);			// FLD e->E1.im
		genf2(c, 0xDE, op2 + 2);	// FADDP/FSUBRP ST(2),ST
		genf2(c, 0xDE, op2 + 2);	// FADDP/FSUBRP ST(2),ST
		pop87();
		pop87();
		if (tyimaginary(e->E2->Ety))
		{
		    if (e->Eoper == OPmulass)
		    {
			genf2(c, 0xD9, 0xE0);	// FCHS
			genf2(c, 0xD9, 0xC8+1);	// FXCH ST(1)
		    }
		    else if (e->Eoper == OPdivass)
		    {
			genf2(c, 0xD9, 0xC8+1);	// FXCH ST(1)
			genf2(c, 0xD9, 0xE0);	// FCHS
		    }
		}
	    L2:
		if (*pretregs & (mST01 | mPSW))
		{
		    c = cat2(c,push87());
		    c = cat2(c,push87());
		    c = genf2(c,0xD9,0xC1);	// FLD ST(1)
		    c = genf2(c,0xD9,0xC1);	// FLD ST(1)
		    retregs = mST01;
		}
		else
		    retregs = 0;
		cs.Iop = 0xDB;
		cs.Irm |= modregrm(0,7,0);
		gen(c,&cs);			// FSTP e->E1.im
		cs.IEVoffset1 -= sz2;
		gen(c,&cs);			// FSTP e->E1.re
		pop87();
		pop87();

	    }
	    else
	    {	unsigned char rmop = cs.Irm | op;
		unsigned char rmfst = cs.Irm | modregrm(0,2,0);
		unsigned char rmfstp = cs.Irm | modregrm(0,3,0);
		unsigned char iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
		unsigned char iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC;

		cs.Iop = iop;
		cs.Irm = rmop;
		cs.IEVoffset1 += sz2;
		c = gen(NULL, &cs);		// FSUBR mreal.im
		if (tyimaginary(e->E2->Ety) && (e->Eoper == OPmulass || e->Eoper == OPdivass))
		{
		    if (e->Eoper == OPmulass)
			genf2(c, 0xD9, 0xE0);		// FCHS
		    genf2(c,0xD9,0xC8 + 1);		// FXCH ST(1)
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);			// FMUL mreal.re
		    if (e->Eoper == OPdivass)
			genf2(c, 0xD9, 0xE0);		// FCHS
		    if (*pretregs & (mST01 | mPSW))
		    {
			cs.Iop = iopfst;
			cs.Irm = rmfst;
			cs.IEVoffset1 += sz2;
			gen(c, &cs);			// FST mreal.im
			genf2(c,0xD9,0xC8 + 1);		// FXCH ST(1)
			cs.IEVoffset1 -= sz2;
			gen(c, &cs);			// FST mreal.re
			genf2(c,0xD9,0xC8 + 1);		// FXCH ST(1)
			retregs = mST01;
		    }
		    else
		    {
			cs.Iop = iopfst;
			cs.Irm = rmfstp;
			cs.IEVoffset1 += sz2;
			gen(c, &cs);			// FSTP mreal.im
			pop87();
			cs.IEVoffset1 -= sz2;
			gen(c, &cs);			// FSTP mreal.re
			pop87();
			retregs = 0;
		    }
		    goto L3;
		}

		if (*pretregs & (mST01 | mPSW))
		{
		    cs.Iop = iopfst;
		    cs.Irm = rmfst;
		    gen(c, &cs);		// FST mreal.im
		    genf2(c,0xD9,0xC8 + 1);	// FXCH ST(1)
		    cs.Iop = iop;
		    cs.Irm = rmop;
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FSUBR mreal.re
		    cs.Iop = iopfst;
		    cs.Irm = rmfst;
		    gen(c, &cs);		// FST mreal.re
		    genf2(c,0xD9,0xC8 + 1);	// FXCH ST(1)
		    retregs = mST01;
		}
		else
		{
		    cs.Iop = iopfst;
		    cs.Irm = rmfstp;
		    gen(c, &cs);		// FSTP mreal.im
		    pop87();
		    cs.Iop = iop;
		    cs.Irm = rmop;
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FSUBR mreal.re
		    cs.Iop = iopfst;
		    cs.Irm = rmfstp;
		    gen(c, &cs);		// FSTP mreal.re
		    pop87();
		    retregs = 0;
		}
	    }
	L3:
	    freenode(e->E1);
	    genfwait(c);
	    return cat4(cr,cl,c,fixresult_complex87(e,retregs,pretregs));

	case OPmulass:
	    c = push87();
	    c = cat2(c, push87());
	    if (ty1 == TYcldouble)
	    {
		cs.Iop = 0xDB;
		cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
		c = gen(c,&cs);			// FLD e->E1.re
		cs.IEVoffset1 += sz2;
		gen(c,&cs);			// FLD e->E1.im
		retregs = mST01;
		c = cat2(c,callclib(e, CLIBcmul, &retregs, 0));
		goto L2;
	    }
	    else
	    {
		cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
		cs.Irm |= modregrm(0, 0, 0);	// FLD tbyte ptr ...
		c = gen(c,&cs);			// FLD e->E1.re
		cs.IEVoffset1 += sz2;
		gen(c,&cs);			// FLD e->E1.im
		retregs = mST01;
		c = cat2(c,callclib(e, CLIBcmul, &retregs, 0));
		if (*pretregs & (mST01 | mPSW))
		{
		    cs.Irm |= modregrm(0, 2, 0);
		    gen(c, &cs);		// FST mreal.im
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FST mreal.re
		    retregs = mST01;
		}
		else
		{
		    cs.Irm |= modregrm(0, 3, 0);
		    gen(c, &cs);		// FSTP mreal.im
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FSTP mreal.re
		    pop87();
		    pop87();
		    retregs = 0;
		}
		goto L3;
	    }

	case OPdivass:
	    c = push87();
	    c = cat2(c, push87());
	    idxregs = idxregm(cs.Irm,cs.Isib);	// mask of index regs used
	    if (ty1 == TYcldouble)
	    {
		cs.Iop = 0xDB;
		cs.Irm |= modregrm(0, 5, 0);	// FLD tbyte ptr ...
		c = gen(c,&cs);			// FLD e->E1.re
		genf2(c,0xD9,0xC8 + 2);		// FXCH ST(2)
		cs.IEVoffset1 += sz2;
		gen(c,&cs);			// FLD e->E1.im
		genf2(c,0xD9,0xC8 + 2);		// FXCH ST(2)
		retregs = mST01;
		c = cat2(c,callclib(e, CLIBcdiv, &retregs, idxregs));
		goto L2;
	    }
	    else
	    {
		cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD;
		cs.Irm |= modregrm(0, 0, 0);	// FLD tbyte ptr ...
		c = gen(c,&cs);			// FLD e->E1.re
		genf2(c,0xD9,0xC8 + 2);		// FXCH ST(2)
		cs.IEVoffset1 += sz2;
		gen(c,&cs);			// FLD e->E1.im
		genf2(c,0xD9,0xC8 + 2);		// FXCH ST(2)
		retregs = mST01;
		c = cat2(c,callclib(e, CLIBcdiv, &retregs, idxregs));
		if (*pretregs & (mST01 | mPSW))
		{
		    cs.Irm |= modregrm(0, 2, 0);
		    gen(c, &cs);		// FST mreal.im
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FST mreal.re
		    retregs = mST01;
		}
		else
		{
		    cs.Irm |= modregrm(0, 3, 0);
		    gen(c, &cs);		// FSTP mreal.im
		    cs.IEVoffset1 -= sz2;
		    gen(c, &cs);		// FSTP mreal.re
		    pop87();
		    pop87();
		    retregs = 0;
		}
		goto L3;
	    }

	default:
	    assert(0);
    }
    return NULL;
}

/**************************
 * OPnegass
 */

code *cdnegass87(elem *e,regm_t *pretregs)
{   regm_t retregs;
    tym_t tyml;
    unsigned op;
    targ_long val;
    code *cl,*cr,*c,cs;
    elem *e1;
    int sz;

    //printf("cdnegass87(e = %p, *pretregs = x%x)\n", e, *pretregs);
    e1 = e->E1;
    tyml = tybasic(e1->Ety);		// type of lvalue
    sz = tysize[tyml];

    cl = getlvalue(&cs,e1,0);
    cr = modEA(cs.Irm);
    cs.Irm |= modregrm(0,6,0);
    cs.Iop = 0x80;
#if LNGDBLSIZE > 10
    if (tyml == TYldouble || tyml == TYildouble)
	cs.IEVoffset1 += 10 - 1;
    else if (tyml == TYcldouble)
	cs.IEVoffset1 += LNGDBLSIZE + 10 - 1;
    else
#endif
	cs.IEVoffset1 += sz - 1;
    cs.IFL2 = FLconst;
    cs.IEV2.Vuns = 0x80;
    c = gen(NULL,&cs);			// XOR 7[EA],0x80
    if (tycomplex(tyml))
    {
	cs.IEVoffset1 -= sz / 2;
	gen(c,&cs);			// XOR 7[EA],0x80
    }
    c = cat3(cl,cr,c);

    if (*pretregs)
    {
	switch (tyml)
	{
	    case TYifloat:
	    case TYfloat:		cs.Iop = 0xD9;	op = 0;	break;
	    case TYidouble:
	    case TYdouble:
	    case TYdouble_alias:	cs.Iop = 0xDD;	op = 0;	break;
	    case TYildouble:
	    case TYldouble:		cs.Iop = 0xDB;	op = 5;	break;
	    default:
		assert(0);
	}
	NEWREG(cs.Irm,op);
	cs.IEVoffset1 -= sz - 1;
	c = cat2(c, push87());
	c = gen(c,&cs);			// FLD EA
	retregs = mST0;
    }
    else
	retregs = 0;

    freenode(e1);
    return cat2(c,fixresult87(e,retregs,pretregs));
}

/************************
 * Take care of OPpostinc and OPpostdec.
 */

code *post87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *cl,*cr,*c;
	code cs;
	unsigned op;
	unsigned op1;
	unsigned reg;
	tym_t ty1;

	//printf("post87()\n");
	assert(*pretregs);
	cl = getlvalue(&cs,e->E1,0);
	cs.Iflags |= ADDFWAIT() ? CFwait : 0;
	if (I32)
	    cs.Iflags &= ~CFopsize;
	ty1 = tybasic(e->E1->Ety);
	switch (ty1)
	{   case TYdouble_alias:
	    case TYidouble:
	    case TYdouble:
	    case TYcdouble:	op1 = ESC(MFdouble,1);	reg = 0;	break;
	    case TYifloat:
	    case TYfloat:
	    case TYcfloat:	op1 = ESC(MFfloat,1);	reg = 0;	break;
	    case TYildouble:
	    case TYldouble:
	    case TYcldouble:	op1 = 0xDB;		reg = 5;	break;
	    default:
		assert(0);
	}
	NEWREG(cs.Irm, reg);
	if (reg == 5)
	    reg = 7;
	else
	    reg = 3;
	cs.Iop = op1;
	cl = cat2(cl,push87());
	cl = gen(cl,&cs);		// FLD e->E1
	if (tycomplex(ty1))
	{   unsigned sz = tysize[ty1] / 2;

	    cl = cat2(cl,push87());
	    cs.IEVoffset1 += sz;
	    cl = gen(cl,&cs);		// FLD e->E1
	    retregs = mST0;		// note kludge to only load real part
	    cr = codelem(e->E2,&retregs,FALSE);	// load rvalue
	    c = genf2(NULL,0xD8,	// FADD/FSUBR ST,ST2
		(e->Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2);
	    NEWREG(cs.Irm,reg);
	    pop87();
	    cs.IEVoffset1 -= sz;
	    gen(c,&cs);			// FSTP e->E1
	    genfwait(c);
	    freenode(e->E1);
	    return cat4(cl, cr, c, fixresult_complex87(e, mST01, pretregs));
	}

	if (*pretregs & (mST0 | ALLREGS | mBP))
	{   // Want the result in a register
	    cl = cat2(cl,push87());
	    genf2(cl,0xD9,0xC0);	// FLD ST0
	}
	if (*pretregs & mPSW)		/* if result in flags		*/
	    genftst(cl,e,0);		// FTST ST0
	retregs = mST0;
	cr = codelem(e->E2,&retregs,FALSE);	/* load rvalue		*/
	pop87();
	op = (e->Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1);
	c = genf2(NULL,0xDE,op);	// FADDP/FSUBRP ST1
	NEWREG(cs.Irm,reg);
	pop87();
	gen(c,&cs);			/* FSTP e->E1			*/
	genfwait(c);
	freenode(e->E1);
	return cat4(cl,cr,c,fixresult87(e,mPSW | mST0,pretregs));
}

/************************
 * Do the following opcodes:
 *	OPd_s16
 *	OPd_s32
 *	OPd_u16
 *	OPd_s64
 */

code *cnvt87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	unsigned mf,rf,reg;
	tym_t tym;
	int clib;
	int sz;
	int szoff;

	//printf("cnvt87(e = %p, *pretregs = x%x)\n", e, *pretregs);
	assert(*pretregs);
	tym = e->Ety;
	sz = tysize(tym);
	szoff = sz;

	switch (e->Eoper)
	{   case OPd_s16:
		clib = CLIBdblint87;
		mf = ESC(MFword,1);
		rf = 3;
		break;

	    case OPd_u16:
		szoff = 4;
	    case OPd_s32:
		clib = CLIBdbllng87;
		mf = ESC(MFlong,1);
		rf = 3;
		break;

	    case OPd_s64:
		clib = CLIBdblllng;
		mf = 0xDF;
		rf = 7;
		break;

	    default:
		assert(0);
	}

	if (!I32)			// C may change the default control word
	{
	    if (clib == CLIBdblllng)
	    {	retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
		c1 = codelem(e->E1,&retregs,FALSE);
		c2 = callclib(e,clib,pretregs,0);
	    }
	    else
	    {	retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16;
		c1 = codelem(e->E1,&retregs,FALSE);
		c2 = callclib(e,clib,pretregs,0);
		pop87();
	    }
	}
	else if (1)
	{   //	Generate:
	    //	sub	ESP,12
	    //	fstcw	8[ESP]
	    //	fldcw	roundto0
	    //	fistp	long64 ptr [ESP]
	    //	fldcw	8[ESP]
	    //	pop	lsw
	    //	pop	msw
	    //	add	ESP,4

	    unsigned szpush = szoff + 2;
	    if (config.flags3 & CFG3pic)
		szpush += 2;
	    szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1);

	    retregs = mST0;
	    c1 = codelem(e->E1,&retregs,FALSE);

	    if (szpush == REGSIZE)
		c1 = gen1(c1,0x50 + AX);		// PUSH EAX
	    else
		c1 = genc2(c1,0x81,modregrm(3,5,SP), szpush);	// SUB ESP,12
	    c1 = genfwait(c1);
	    genc1(c1,0xD9,modregrm(2,7,4) + 256*modregrm(4,4,SP),FLconst,szoff); // FSTCW szoff[ESP]

	    c1 = genfwait(c1);

	    if (config.flags3 & CFG3pic)
	    {
		genc(c1,0xC7,modregrm(2,0,4) + 256*modregrm(4,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0
		code_orflag(c1, CFopsize);
		genc1(c1,0xD9,modregrm(2,5,4) + 256*modregrm(4,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP]
	    }
	    else
		c1 = genrnd(c1, CW_roundto0);	// FLDCW roundto0

	    pop87();

	    c1 = genfwait(c1);
	    gen2sib(c1,mf,modregrm(0,rf,4),modregrm(4,4,SP));			// FISTP [ESP]

	    retregs = *pretregs & (ALLREGS | mBP);
	    if (!retregs)
		    retregs = ALLREGS;
	    c2 = allocreg(&retregs,&reg,tym);

	    c2 = genfwait(c2);								// FWAIT
	    c2 = genc1(c2,0xD9,modregrm(2,5,4) + 256*modregrm(4,4,SP),FLconst,szoff);	// FLDCW szoff[ESP]

	    if (szoff > REGSIZE)
	    {	szpush -= REGSIZE;
		c2 = gen1(c2,0x58 + findreglsw(retregs));	// POP lsw
	    }
	    szpush -= REGSIZE;
	    c2 = gen1(c2,0x58 + reg);				// POP reg

	    if (szpush)
		genc2(c2,0x81,modregrm(3,0,SP), szpush);	// ADD ESP,4
	    c2 = cat2(c2,fixresult(e,retregs,pretregs));
	}
	else
	{
	    // This is incorrect. For -inf and nan, the 8087 returns the largest
	    // negative int (0x80000....). For -inf, 0x7FFFF... should be returned,
	    // and for nan, 0 should be returned.
	    retregs = mST0;
	    c1 = codelem(e->E1,&retregs,FALSE);

	    c1 = genfwait(c1);
	    c1 = genrnd(c1, CW_roundto0);	// FLDCW roundto0

	    pop87();
	    c1 = genfltreg(c1,mf,rf,0);		// FISTP floatreg
	    retregs = *pretregs & (ALLREGS | mBP);
	    if (!retregs)
		    retregs = ALLREGS;
	    c2 = allocreg(&retregs,&reg,tym);

	    c2 = genfwait(c2);

	    if (sz > REGSIZE)
	    {	c2 = genfltreg(c2,0x8B,reg,REGSIZE);	// MOV reg,floatreg + REGSIZE
							// MOV lsreg,floatreg
		genfltreg(c2,0x8B,findreglsw(retregs),0);
	    }
	    else
		c2 = genfltreg(c2,0x8B,reg,0);	// MOV reg,floatreg
	    c2 = genrnd(c2, CW_roundtonearest);	// FLDCW roundtonearest
	    c2 = cat2(c2,fixresult(e,retregs,pretregs));
	}
	return cat2(c1,c2);
}

/************************
 * Do OPrndtol.
 */

code *cdrndtol(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	unsigned mf,reg;
	tym_t tym;
	int clib;
        unsigned sz;
	unsigned char op1,op2;

	if (*pretregs == 0)
	    return codelem(e->E1,pretregs,FALSE);
	tym = e->Ety;
	retregs = mST0;
	c1 = codelem(e->E1,&retregs,FALSE);

	sz = tysize(tym);
	switch (sz)
	{   case 2:
		op1 = 0xDF;
		op2 = 3;
		break;
	    case 4:
		op1 = 0xDB;
		op2 = 3;
		break;
	    case 8:
		op1 = 0xDF;
		op2 = 7;
		break;
	    default:
		assert(0);
	}

	pop87();
	c1 = genfltreg(c1,op1,op2,0);		// FISTP floatreg
	retregs = *pretregs & (ALLREGS | mBP);
	if (!retregs)
		retregs = ALLREGS;
	c2 = allocreg(&retregs,&reg,tym);
	c2 = genfwait(c2);			// FWAIT
	if (tysize(tym) > REGSIZE)
	{   c2 = genfltreg(c2,0x8B,reg,REGSIZE);	// MOV reg,floatreg + REGSIZE
							// MOV lsreg,floatreg
	    genfltreg(c2,0x8B,findreglsw(retregs),0);
	}
	else
	    c2 = genfltreg(c2,0x8B,reg,0);	// MOV reg,floatreg
	c2 = cat2(c2,fixresult(e,retregs,pretregs));

	return cat2(c1,c2);
}

/*************************
 * Do OPscale, OPyl2x, OPyl2xp1.
 */

code *cdscale(elem *e,regm_t *pretregs)
{
    regm_t retregs;
    code *c1,*c2,*c3;

    assert(*pretregs != 0);

    retregs = mST0;
    c1 = codelem(e->E1,&retregs,FALSE);
    note87(e->E1,0,0);
    c2 = codelem(e->E2,&retregs,FALSE);
    c2 = cat2(c2,makesure87(e->E1,0,1,0));	// now have x,y on stack; need y,x
    switch (e->Eoper)
    {
	case OPscale:
	    c2 = genf2(c2,0xD9,0xFD);			// FSCALE
	    genf2(c2,0xDD,0xD8 + 1);			// FSTP ST(1)
	    break;

	case OPyl2x:
	    c2 = genf2(c2,0xD9,0xF1);			// FYL2X
	    break;

	case OPyl2xp1:
	    c2 = genf2(c2,0xD9,0xF9);			// FYL2XP1
	    break;
    }
    pop87();
    c3 = fixresult87(e,mST0,pretregs);
    return cat3(c1,c2,c3);
}


/**********************************
 * Unary -, absolute value, square root, sine, cosine
 */

code *neg87(elem *e,regm_t *pretregs)
{
	regm_t retregs;
	code *c1,*c2;
	int op;

	assert(*pretregs);
	switch (e->Eoper)
	{   case OPneg:	 op = 0xE0;	break;
	    case OPabs:	 op = 0xE1;	break;
	    case OPsqrt: op = 0xFA;	break;
	    case OPsin:	 op = 0xFE;	break;
	    case OPcos:	 op = 0xFF;	break;
	    case OPrint: op = 0xFC;	break;	// FRNDINT
	    default:
		assert(0);
	}
	retregs = mST0;
	c1 = codelem(e->E1,&retregs,FALSE);
	c1 = genf2(c1,0xD9,op);			// FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT
	c2 = fixresult87(e,mST0,pretregs);
	return cat2(c1,c2);
}

/**********************************
 * Unary - for complex operands
 */

code *neg_complex87(elem *e,regm_t *pretregs)
{
    regm_t retregs;
    code *c1,*c2;

    assert(e->Eoper == OPneg);
    retregs = mST01;
    c1 = codelem(e->E1,&retregs,FALSE);
    c1 = genf2(c1,0xD9,0xE0);		// FCHS
    genf2(c1,0xD9,0xC8 + 1);		// FXCH ST(1)
    genf2(c1,0xD9,0xE0);		// FCHS
    genf2(c1,0xD9,0xC8 + 1);		// FXCH ST(1)
    c2 = fixresult_complex87(e,mST01,pretregs);
    return cat2(c1,c2);
}

/*********************************
 */

code *cdind87(elem *e,regm_t *pretregs)
{   code *c,*ce,cs;

    //printf("cdind87(e = %p, *pretregs = x%x)\n",e,*pretregs);

    c = getlvalue(&cs,e,0);		// get addressing mode
    if (*pretregs)
    {
	switch (tybasic(e->Ety))
	{   case TYfloat:
	    case TYifloat:
		cs.Iop = 0xD9;
		break;

	    case TYidouble:
	    case TYdouble:
	    case TYdouble_alias:
		cs.Iop = 0xDD;
		break;

	    case TYildouble:
	    case TYldouble:
		cs.Iop = 0xDB;
		cs.Irm |= modregrm(0,5,0);
		break;

	    default:
		assert(0);
	}
	c = cat2(c,push87());
	c = gen(c,&cs);			// FLD EA
	ce = fixresult87(e,mST0,pretregs);
	c = cat2(c,ce);
    }
    return c;
}

/************************************
 * Reset statics for another .obj file.
 */

void cg87_reset()
{
    memset(&oldd,0,sizeof(oldd));
}


/*****************************************
 * Initialize control word constants.
 */

STATIC code *genrnd(code *c, short cw)
{
    if (config.flags3 & CFG3pic)
    {	code *c1;

	c1 = genfltreg(NULL, 0xC7, 0, 0);	// MOV floatreg, cw
	c1->IFL2 = FLconst;
	c1->IEV2.Vuns = cw;

	c1 = genfltreg(c1, 0xD9, 5, 0);		// FLDCW floatreg
	c = cat2(c, c1);
    }
    else
    {
	if (!oldd.round)		// if not initialized
	{   short cwi;

	    oldd.round = 1;

	    cwi = CW_roundto0;		// round to 0
	    oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2);
	    cwi = CW_roundtonearest;		// round to nearest
	    oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2);
	}
	symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest;
	code cs;
	cs.Iop = 0xD9;
	cs.Iflags = CFoff;
	cs.Ijty = 0;
	cs.IEVsym1 = rnddir;
	cs.IFL1 = rnddir->Sfl;
	cs.IEVoffset1 = 0;
	cs.Irm = modregrm(0,5,BPRM);
	c = gen(c,&cs);
    }
    return c;
}

/************************* Complex Numbers *********************/

/***************************
 * Set the PSW based on the state of ST01.
 * Input:
 *	pop	if stack should be popped after test
 * Returns:
 *	start of code appended to c.
 */

STATIC code * genctst(code *c,elem *e,int pop)
#if __DMC__
__in
{
    assert(pop == 0 || pop == 1);
}
__body
#endif
{
    // Generate:
    //	if (pop)
    //		FLDZ
    //		FUCOMPP
    //		FSTSW	AX
    //		SAHF
    //		FLDZ
    //		FUCOMPP
    //		JNE	L1
    //		JP	L1		// if NAN
    //		FSTSW	AX
    //		SAHF
    //	    L1:
    //	else
    //		FLDZ
    //		FUCOM
    //		FSTSW	AX
    //		SAHF
    //		FUCOMP	ST(2)
    //		JNE	L1
    //		JP	L1		// if NAN
    //		FSTSW	AX
    //		SAHF
    //	    L1:
    // FUCOMP doesn't raise exceptions on QNANs, unlike FTST

    code *cnop;

    cnop = gennop(CNIL);
    c = cat2(c,push87());
    c = gen2(c,0xD9,0xEE);		// FLDZ
    if (pop)
    {
	gen2(c,0xDA,0xE9);		// FUCOMPP
	pop87();
	pop87();
	cg87_87topsw(c);		// put 8087 flags in CPU flags
	gen2(c,0xD9,0xEE);		// FLDZ
	gen2(c,0xDA,0xE9);		// FUCOMPP
	pop87();
	genjmp(c,JNE,FLcode,(block *) cnop); // JNE	L1
	genjmp(c,JP, FLcode,(block *) cnop); // JP	L1
	cg87_87topsw(c);		// put 8087 flags in CPU flags
    }
    else
    {
	gen2(c,0xDD,0xE1);		// FUCOM
	cg87_87topsw(c);		// put 8087 flags in CPU flags
	gen2(c,0xDD,0xEA);		// FUCOMP ST(2)
	pop87();
	genjmp(c,JNE,FLcode,(block *) cnop); // JNE	L1
	genjmp(c,JP, FLcode,(block *) cnop); // JP	L1
	cg87_87topsw(c);		// put 8087 flags in CPU flags
    }
    return cat2(c, cnop);
}

/******************************
 * Given the result of an expression is in retregs,
 * generate necessary code to return result in *pretregs.
 */


code *fixresult_complex87(elem *e,regm_t retregs,regm_t *pretregs)
{
    regm_t regm;
    tym_t tym;
    code *c1,*c2;
    unsigned sz;

#if 0
    printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n",
  	e,regm_str(retregs),regm_str(*pretregs));
#endif
    assert(!*pretregs || retregs);
    c1 = CNIL;
    c2 = CNIL;
    tym = tybasic(e->Ety);
    sz = tysize[tym];

    if (*pretregs == 0 && retregs == mST01)
    {
	c1 = genf2(c1,0xDD,modregrm(3,3,0));	// FPOP
	pop87();
	c1 = genf2(c1,0xDD,modregrm(3,3,0));	// FPOP
	pop87();
    }
    else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01)
    {
	if (*pretregs & mPSW && !(retregs & mPSW))
	    c1 = genctst(c1,e,0);		// FTST
	pop87();
	c1 = genfltreg(c1, ESC(MFfloat,1),3,0);	// FSTP floatreg
	genfwait(c1);
	c2 = getregs(mDX|mAX);
	c2 = genfltreg(c2, 0x8B, DX, 0);	// MOV EDX,floatreg

	pop87();
	c2 = genfltreg(c2, ESC(MFfloat,1),3,0);	// FSTP floatreg
	genfwait(c2);
	c2 = genfltreg(c2, 0x8B, AX, 0);	// MOV EAX,floatreg
    }
    else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01)
    {
	c1 = push87();
	c1 = genfltreg(c1, 0x89, AX, 0);	// MOV floatreg, EAX
	genfltreg(c1, 0xD9, 0, 0);		// FLD float ptr floatreg

	c2 = push87();
	c2 = genfltreg(c2, 0x89, DX, 0);	// MOV floatreg, EDX
	genfltreg(c2, 0xD9, 0, 0);		// FLD float ptr floatreg

	if (*pretregs & mPSW)
	    c2 = genctst(c2,e,0);		// FTST
    }
    else
    {	if (*pretregs & mPSW)
	{   if (!(retregs & mPSW))
	    {	assert(retregs & mST01);
		c1 = genctst(c1,e,!(*pretregs & mST01));	// FTST
	    }
	}
	assert(!(*pretregs & mST01) || (retregs & mST01));
    }
    if (*pretregs & mST01)
    {	note87(e,0,1);
	note87(e,sz/2,0);
    }
    return cat2(c1,c2);
}

/*****************************************
 * Operators OPc_r and OPc_i
 */

code *cdconvt87(elem *e, regm_t *pretregs)
{
    regm_t retregs;
    code *c;

    retregs = mST01;
    c = codelem(e->E1, &retregs, FALSE);
    switch (e->Eoper)
    {
	case OPc_r:
	    c = genf2(c,0xDD,0xD8 + 0);	// FPOP
	    pop87();
	    break;

	case OPc_i:
	    c = genf2(c,0xDD,0xD8 + 1);	// FSTP ST(1)
	    pop87();
	    break;

	default:
	    assert(0);
    }
    retregs = mST0;
    c = cat2(c, fixresult87(e, retregs, pretregs));
    return c;
}

/**************************************
 * Load complex operand into ST01 or flags or both.
 */

code *cload87(elem *e, regm_t *pretregs)
#if __DMC__
__in
{
    assert(I32 && config.inline8087);
    elem_debug(e);
    assert(*pretregs & (mST01 | mPSW));
    assert(!(*pretregs & ~(mST01 | mPSW)));
}
__out (result)
{
}
__body
#endif
{
    tym_t ty = tybasic(e->Ety);
    code *c = NULL;
    code *cpush = NULL;
    code cs;
    unsigned mf;
    unsigned sz;
    unsigned char ldop;
    regm_t retregs;
    symbol *s;
    int i;

    sz = tysize[ty] / 2;
    memset(&cs, 0, sizeof(cs));
    if (ADDFWAIT())
	cs.Iflags = CFwait;
    switch (ty)
    {
	case TYcfloat:	    mf = MFfloat;	    break;
	case TYcdouble:	    mf = MFdouble;	    break;
	case TYcldouble:    break;
	default:	    assert(0);
    }
    switch (e->Eoper)
    {
	case OPvar:
	    notreg(e);			// never enregister this variable
	case OPind:
	    cpush = cat2(push87(), push87());
	    switch (ty)
	    {
		case TYcfloat:
		case TYcdouble:
		    c = loadea(e,&cs,ESC(mf,1),0,0,0,0);	// FLD var
		    cs.IEVoffset1 += sz;
		    c = gen(c, &cs);
		    break;

		case TYcldouble:
		    c = loadea(e,&cs,0xDB,5,0,0,0);		// FLD var
		    cs.IEVoffset1 += sz;
		    c = gen(c, &cs);
		    break;

		default:
		    assert(0);
	    }
	    retregs = mST01;
	    break;

	case OPd_ld:
	case OPld_d:
	case OPf_d:
	case OPd_f:
	    c = cload87(e->E1, pretregs);
	    freenode(e->E1);
	    return c;

	case OPconst:
	    cpush = cat2(push87(), push87());
	    for (i = 0; i < 2; i++)
	    {
		ldop = loadconst(e, i);
		if (ldop)
		{
		    c = genf2(c,0xD9,ldop);		// FLDx
		}
		else
		{
		    assert(0);
		}
	    }
	    retregs = mST01;
	    break;

	default:
#ifdef DEBUG
	    elem_print(e);
#endif
	    assert(0);
    }
    return cat4(cpush,c,fixresult_complex87(e, retregs, pretregs), NULL);
}

#endif // !SPP
